Logo ROOT  
Reference Guide
RLazyDSImpl.hxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_RLAZYDSIMPL
12 #define ROOT_RLAZYDSIMPL
13 
15 #include "ROOT/RMakeUnique.hxx"
16 #include "ROOT/RDataSource.hxx"
17 #include "ROOT/RResultPtr.hxx"
18 #include "ROOT/TSeq.hxx"
19 
20 #include <algorithm>
21 #include <map>
22 #include <tuple>
23 #include <string>
24 #include <typeinfo>
25 #include <vector>
26 
27 namespace ROOT {
28 
29 namespace RDF {
30 ////////////////////////////////////////////////////////////////////////////////////////////////
31 /// \brief A RDataSource implementation which is built on top of result proxies
32 ///
33 /// This component allows to create a data source on a set of columns coming from
34 /// one or multiple data frames. The processing of the parent data frames starts
35 /// only when the event loop is triggered in the data frame initialised with a
36 /// RLazyDS.
37 ///
38 /// The implementation takes care of matching compile time information with runtime
39 /// information, e.g. expanding in a smart way the template parameters packs.
40 template <typename... ColumnTypes>
41 class RLazyDS final : public ROOT::RDF::RDataSource {
42  using PointerHolderPtrs_t = std::vector<ROOT::Internal::TDS::TPointerHolder *>;
43 
44  std::tuple<RResultPtr<std::vector<ColumnTypes>>...> fColumns;
45  const std::vector<std::string> fColNames;
46  const std::map<std::string, std::string> fColTypesMap;
47  // The role of the fPointerHoldersModels is to be initialised with the pack
48  // of arguments in the constrcutor signature at construction time
49  // Once the number of slots is known, the fPointerHolders are initialised
50  // according to the models.
52  std::vector<PointerHolderPtrs_t> fPointerHolders;
53  std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges{};
54  unsigned int fNSlots{0};
55 
56  Record_t GetColumnReadersImpl(std::string_view colName, const std::type_info &id)
57  {
58  auto colNameStr = std::string(colName);
59  // This could be optimised and done statically
60  const auto idName = ROOT::Internal::RDF::TypeID2TypeName(id);
61  auto it = fColTypesMap.find(colNameStr);
62  if (fColTypesMap.end() == it) {
63  std::string err = "The specified column name, \"" + colNameStr + "\" is not known to the data source.";
64  throw std::runtime_error(err);
65  }
66 
67  const auto colIdName = it->second;
68  if (colIdName != idName) {
69  std::string err = "Column " + colNameStr + " has type " + colIdName +
70  " while the id specified is associated to type " + idName;
71  throw std::runtime_error(err);
72  }
73 
74  const auto colBegin = fColNames.begin();
75  const auto colEnd = fColNames.end();
76  const auto namesIt = std::find(colBegin, colEnd, colName);
77  const auto index = std::distance(colBegin, namesIt);
78 
79  Record_t ret(fNSlots);
80  for (auto slot : ROOT::TSeqU(fNSlots)) {
81  ret[slot] = fPointerHolders[index][slot]->GetPointerAddr();
82  }
83  return ret;
84  }
85 
86  size_t GetEntriesNumber() { return std::get<0>(fColumns)->size(); }
87  template <std::size_t... S>
88  void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence<S...>)
89  {
90  std::initializer_list<int> expander{
91  (*static_cast<ColumnTypes *>(fPointerHolders[S][slot]->GetPointer()) = (*std::get<S>(fColumns))[entry], 0)...};
92  (void)expander; // avoid unused variable warnings
93  }
94 
95  template <std::size_t... S>
96  void ColLenghtChecker(std::index_sequence<S...>)
97  {
98  if (sizeof...(S) < 2)
99  return;
100 
101  const std::vector<size_t> colLengths{std::get<S>(fColumns)->size()...};
102  const auto expectedLen = colLengths[0];
103  std::string err;
104  for (auto i : TSeqI(1, colLengths.size())) {
105  if (expectedLen != colLengths[i]) {
106  err += "Column \"" + fColNames[i] + "\" and column \"" + fColNames[0] +
107  "\" have different lengths: " + std::to_string(expectedLen) + " and " +
108  std::to_string(colLengths[i]);
109  }
110  }
111  if (!err.empty()) {
112  throw std::runtime_error(err);
113  }
114  }
115 
116 protected:
117  std::string AsString() { return "lazy data source"; };
118 
119 public:
120  RLazyDS(std::pair<std::string, RResultPtr<std::vector<ColumnTypes>>>... colsNameVals)
121  : fColumns(std::tuple<RResultPtr<std::vector<ColumnTypes>>...>(colsNameVals.second...)),
122  fColNames({colsNameVals.first...}),
123  fColTypesMap({{colsNameVals.first, ROOT::Internal::RDF::TypeID2TypeName(typeid(ColumnTypes))}...}),
125  {
126  }
127 
129  {
130  for (auto &&ptrHolderv : fPointerHolders) {
131  for (auto &&ptrHolder : ptrHolderv) {
132  delete ptrHolder;
133  }
134  }
135  }
136 
137  const std::vector<std::string> &GetColumnNames() const { return fColNames; }
138 
139  std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges()
140  {
141  auto entryRanges(std::move(fEntryRanges)); // empty fEntryRanges
142  return entryRanges;
143  }
144 
145  std::string GetTypeName(std::string_view colName) const
146  {
147  const auto key = std::string(colName);
148  return fColTypesMap.at(key);
149  }
150 
151  bool HasColumn(std::string_view colName) const
152  {
153  const auto key = std::string(colName);
154  const auto endIt = fColTypesMap.end();
155  return endIt != fColTypesMap.find(key);
156  }
157 
158  bool SetEntry(unsigned int slot, ULong64_t entry)
159  {
160  SetEntryHelper(slot, entry, std::index_sequence_for<ColumnTypes...>());
161  return true;
162  }
163 
164  void SetNSlots(unsigned int nSlots)
165  {
166  fNSlots = nSlots;
167  const auto nCols = fColNames.size();
168  fPointerHolders.resize(nCols); // now we need to fill it with the slots, all of the same type
169  auto colIndex = 0U;
170  for (auto &&ptrHolderv : fPointerHolders) {
171  for (auto slot : ROOT::TSeqI(fNSlots)) {
172  auto ptrHolder = fPointerHoldersModels[colIndex]->GetDeepCopy();
173  ptrHolderv.emplace_back(ptrHolder);
174  (void)slot;
175  }
176  colIndex++;
177  }
178  for (auto &&ptrHolder : fPointerHoldersModels)
179  delete ptrHolder;
180  }
181 
182  void Initialise()
183  {
184  ColLenghtChecker(std::index_sequence_for<ColumnTypes...>());
185  const auto nEntries = GetEntriesNumber();
186  const auto nEntriesInRange = nEntries / fNSlots; // between integers. Should make smaller?
187  auto reminder = 1U == fNSlots ? 0 : nEntries % fNSlots;
188  fEntryRanges.resize(fNSlots);
189  auto init = 0ULL;
190  auto end = 0ULL;
191  for (auto &&range : fEntryRanges) {
192  end = init + nEntriesInRange;
193  if (0 != reminder) { // Distribute the reminder among the first chunks
194  reminder--;
195  end += 1;
196  }
197  range.first = init;
198  range.second = end;
199  init = end;
200  }
201  }
202 
203  std::string GetLabel() { return "LazyDS"; }
204 };
205 
206 } // ns RDF
207 
208 } // ns ROOT
209 
210 #endif
ROOT::RDF::RLazyDS::fPointerHoldersModels
const PointerHolderPtrs_t fPointerHoldersModels
Definition: RLazyDSImpl.hxx:51
ROOT::RDF::RLazyDS::SetEntry
bool SetEntry(unsigned int slot, ULong64_t entry)
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition: RLazyDSImpl.hxx:158
ROOT::RDF::RLazyDS::HasColumn
bool HasColumn(std::string_view colName) const
Checks if the dataset has a certain column.
Definition: RLazyDSImpl.hxx:151
ROOT::RDF::RLazyDS::fColNames
const std::vector< std::string > fColNames
Definition: RLazyDSImpl.hxx:45
ROOT::RDF::RLazyDS::ColLenghtChecker
void ColLenghtChecker(std::index_sequence< S... >)
Definition: RLazyDSImpl.hxx:96
ROOT::RDF::RLazyDS::RLazyDS
RLazyDS(std::pair< std::string, RResultPtr< std::vector< ColumnTypes >>>... colsNameVals)
Definition: RLazyDSImpl.hxx:120
string_view
basic_string_view< char > string_view
Definition: libcpp_string_view.h:785
BatchHelpers::init
EvaluateInfo init(std::vector< RooRealProxy > parameters, std::vector< ArrayWrapper * > wrappers, std::vector< double * > arrays, size_t begin, size_t batchSize)
ROOT::TSeqI
TSeq< int > TSeqI
Definition: TSeq.hxx:194
ROOT::RDF::RLazyDS::AsString
std::string AsString()
Definition: RLazyDSImpl.hxx:117
RooFitShortHand::S
RooArgSet S(const RooAbsArg &v1)
Definition: RooGlobalFunc.cxx:354
ROOT::RDF::RResultPtr
Smart pointer for the return type of actions.
Definition: RResultPtr.hxx:79
ROOT::RDF::RLazyDS::SetNSlots
void SetNSlots(unsigned int nSlots)
Inform RDataSource of the number of processing slots (i.e.
Definition: RLazyDSImpl.hxx:164
ROOT::RDF::RDataSource
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
Definition: RDataSource.hxx:106
ROOT::RDF::RLazyDS::fNSlots
unsigned int fNSlots
Definition: RLazyDSImpl.hxx:54
RDataSource.hxx
ROOT::RDF::RLazyDS
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:41
RIntegerSequence.hxx
ROOT::RDF::RLazyDS::fColTypesMap
const std::map< std::string, std::string > fColTypesMap
Definition: RLazyDSImpl.hxx:46
ROOT::RDF::RLazyDS::fPointerHolders
std::vector< PointerHolderPtrs_t > fPointerHolders
Definition: RLazyDSImpl.hxx:52
ROOT::RDF::RLazyDS::GetEntryRanges
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges()
Return ranges of entries to distribute to tasks.
Definition: RLazyDSImpl.hxx:139
TSeq.hxx
ROOT::RDF::RLazyDS::GetEntriesNumber
size_t GetEntriesNumber()
Definition: RLazyDSImpl.hxx:86
ROOT::Internal::TDS::TTypedPointerHolder
Class to wrap a pointer and delete the memory associated to it correctly.
Definition: RDataSource.hxx:59
ROOT::RDF::RLazyDS::fColumns
std::tuple< RResultPtr< std::vector< ColumnTypes > >... > fColumns
Definition: RLazyDSImpl.hxx:44
ROOT::RDF::RLazyDS::Initialise
void Initialise()
Convenience method called before starting an event-loop.
Definition: RLazyDSImpl.hxx:182
ROOT::RDF::RLazyDS::GetColumnNames
const std::vector< std::string > & GetColumnNames() const
Returns a reference to the collection of the dataset's column names.
Definition: RLazyDSImpl.hxx:137
void
typedef void((*Func_t)())
ROOT::RDF::RLazyDS::fEntryRanges
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Definition: RLazyDSImpl.hxx:53
ULong64_t
unsigned long long ULong64_t
Definition: RtypesCore.h:74
ROOT::RDF::RDataSource::Record_t
std::vector< void * > Record_t
Definition: RDataSource.hxx:109
ROOT::RDF::RLazyDS::PointerHolderPtrs_t
std::vector< ROOT::Internal::TDS::TPointerHolder * > PointerHolderPtrs_t
Definition: RLazyDSImpl.hxx:42
ROOT::RDF::RLazyDS::SetEntryHelper
void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence< S... >)
Definition: RLazyDSImpl.hxx:88
ROOT::RDF::RLazyDS::GetTypeName
std::string GetTypeName(std::string_view colName) const
Type of a column as a string, e.g.
Definition: RLazyDSImpl.hxx:145
ROOT::RDF::RLazyDS::GetLabel
std::string GetLabel()
Return a string representation of the datasource type.
Definition: RLazyDSImpl.hxx:203
RMakeUnique.hxx
ROOT::RDF::RLazyDS::GetColumnReadersImpl
Record_t GetColumnReadersImpl(std::string_view colName, const std::type_info &id)
type-erased vector of pointers to pointers to column values - one per slot
Definition: RLazyDSImpl.hxx:56
ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
ROOT::Internal::RDF::TypeID2TypeName
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:91
TGeant4Unit::second
static constexpr double second
Definition: TGeant4SystemOfUnits.h:151
ROOT::RDF::RLazyDS::~RLazyDS
~RLazyDS()
Definition: RLazyDSImpl.hxx:128
ROOT
VSD Structures.
Definition: StringConv.hxx:21
RResultPtr.hxx