Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RLazyDSImpl.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RLAZYDSIMPL
12#define ROOT_RLAZYDSIMPL
13
14#include "ROOT/RDataSource.hxx"
15#include "ROOT/RResultPtr.hxx"
16#include "ROOT/TSeq.hxx"
17
18#include <algorithm>
19#include <map>
20#include <memory>
21#include <tuple>
22#include <string>
23#include <typeinfo>
24#include <utility> // std::index_sequence
25#include <vector>
26
27namespace ROOT {
28
29namespace RDF {
30////////////////////////////////////////////////////////////////////////////////////////////////
31/// \brief A RDataSource implementation which is built on top of result proxies
32///
33/// This component allows to create a data source on a set of columns coming from
34/// one or multiple data frames. The processing of the parent data frames starts
35/// only when the event loop is triggered in the data frame initialized with a
36/// RLazyDS.
37///
38/// The implementation takes care of matching compile time information with runtime
39/// information, e.g. expanding in a smart way the template parameters packs.
40template <typename... ColumnTypes>
42 using PointerHolderPtrs_t = std::vector<ROOT::Internal::TDS::TPointerHolder *>;
43
44 std::tuple<RResultPtr<std::vector<ColumnTypes>>...> fColumns;
45 const std::vector<std::string> fColNames;
46 const std::map<std::string, std::string> fColTypesMap;
47 // The role of the fPointerHoldersModels is to be initialized with the pack
48 // of arguments in the constrcutor signature at construction time
49 // Once the number of slots is known, the fPointerHolders are initialized
50 // according to the models.
52 std::vector<PointerHolderPtrs_t> fPointerHolders;
53 std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges{};
54
55 Record_t GetColumnReadersImpl(std::string_view colName, const std::type_info &id) final
56 {
57 auto colNameStr = std::string(colName);
58 // This could be optimised and done statically
60 auto it = fColTypesMap.find(colNameStr);
61 if (fColTypesMap.end() == it) {
62 std::string err = "The specified column name, \"" + colNameStr + "\" is not known to the data source.";
63 throw std::runtime_error(err);
64 }
65
66 const auto colIdName = it->second;
67 if (colIdName != idName) {
68 std::string err = "Column " + colNameStr + " has type " + colIdName +
69 " while the id specified is associated to type " + idName;
70 throw std::runtime_error(err);
71 }
72
73 const auto colBegin = fColNames.begin();
74 const auto colEnd = fColNames.end();
75 const auto namesIt = std::find(colBegin, colEnd, colName);
76 const auto index = std::distance(colBegin, namesIt);
77
79 for (auto slot : ROOT::TSeqU(fNSlots)) {
80 ret[slot] = fPointerHolders[index][slot]->GetPointerAddr();
81 }
82 return ret;
83 }
84
85 size_t GetEntriesNumber() { return std::get<0>(fColumns)->size(); }
86 template <std::size_t... S>
87 void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence<S...>)
88 {
89 std::initializer_list<int> expander{
90 (*static_cast<ColumnTypes *>(fPointerHolders[S][slot]->GetPointer()) = (*std::get<S>(fColumns))[entry], 0)...};
91 (void)expander; // avoid unused variable warnings
92 }
93
94 template <std::size_t... S>
95 void ColLengthChecker(std::index_sequence<S...>)
96 {
97 if (sizeof...(S) < 2)
98 return;
99
100 const std::vector<size_t> colLengths{std::get<S>(fColumns)->size()...};
101 const auto expectedLen = colLengths[0];
102 std::string err;
103 for (auto i : TSeqI(1, colLengths.size())) {
104 if (expectedLen != colLengths[i]) {
105 err += "Column \"" + fColNames[i] + "\" and column \"" + fColNames[0] +
106 "\" have different lengths: " + std::to_string(expectedLen) + " and " +
107 std::to_string(colLengths[i]);
108 }
109 }
110 if (!err.empty()) {
111 throw std::runtime_error(err);
112 }
113 }
114
115protected:
116 std::string AsString() final { return "lazy data source"; };
117
118public:
119 RLazyDS(std::pair<std::string, RResultPtr<std::vector<ColumnTypes>>>... colsNameVals)
120 : fColumns(std::tuple<RResultPtr<std::vector<ColumnTypes>>...>(colsNameVals.second...)),
121 fColNames({colsNameVals.first...}),
124 {
125 }
126
127 // Rule of five
128 RLazyDS(const RLazyDS &) = delete;
129 RLazyDS &operator=(const RLazyDS &) = delete;
130 RLazyDS(RLazyDS &&) = delete;
131 RLazyDS &operator=(RLazyDS &&) = delete;
133 {
134 for (auto &&ptrHolderv : fPointerHolders) {
135 for (auto &&ptrHolder : ptrHolderv) {
136 delete ptrHolder;
137 }
138 }
139 }
140
141 const std::vector<std::string> &GetColumnNames() const final { return fColNames; }
142
143 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final
144 {
145 auto entryRanges(std::move(fEntryRanges)); // empty fEntryRanges
146 return entryRanges;
147 }
148
149 std::string GetTypeName(std::string_view colName) const final
150 {
151 const auto key = std::string(colName);
152 return fColTypesMap.at(key);
153 }
154
155 bool HasColumn(std::string_view colName) const final
156 {
157 const auto key = std::string(colName);
158 const auto endIt = fColTypesMap.end();
159 return endIt != fColTypesMap.find(key);
160 }
161
162 bool SetEntry(unsigned int slot, ULong64_t entry) final
163 {
164 SetEntryHelper(slot, entry, std::index_sequence_for<ColumnTypes...>());
165 return true;
166 }
167
168 void SetNSlots(unsigned int nSlots) final
169 {
170 fNSlots = nSlots;
171 const auto nCols = fColNames.size();
172 fPointerHolders.resize(nCols); // now we need to fill it with the slots, all of the same type
173 auto colIndex = 0U;
174 for (auto &&ptrHolderv : fPointerHolders) {
175 for (auto slot : ROOT::TSeqI(fNSlots)) {
176 auto ptrHolder = fPointerHoldersModels[colIndex]->GetDeepCopy();
177 ptrHolderv.emplace_back(ptrHolder);
178 (void)slot;
179 }
180 colIndex++;
181 }
182 for (auto &&ptrHolder : fPointerHoldersModels)
183 delete ptrHolder;
184 }
185
187 {
188 ColLengthChecker(std::index_sequence_for<ColumnTypes...>());
189 const auto nEntries = GetEntriesNumber();
190 const auto nEntriesInRange = nEntries / fNSlots; // between integers. Should make smaller?
191 auto reminder = 1U == fNSlots ? 0 : nEntries % fNSlots;
192 fEntryRanges.resize(fNSlots);
193 auto init = 0ULL;
194 auto end = 0ULL;
195 for (auto &&range : fEntryRanges) {
196 end = init + nEntriesInRange;
197 if (0 != reminder) { // Distribute the reminder among the first chunks
198 reminder--;
199 end += 1;
200 }
201 range.first = init;
202 range.second = end;
203 init = end;
204 }
205 }
206
207 std::string GetLabel() final { return "LazyDS"; }
208};
209
210} // ns RDF
211
212} // ns ROOT
213
214#endif
unsigned long long ULong64_t
Definition RtypesCore.h:70
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Class to wrap a pointer and delete the memory associated to it correctly.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
A RDataSource implementation which is built on top of result proxies.
RLazyDS(RLazyDS &&)=delete
const std::vector< std::string > fColNames
std::vector< PointerHolderPtrs_t > fPointerHolders
RLazyDS(const RLazyDS &)=delete
void SetNSlots(unsigned int nSlots) final
Inform RDataSource of the number of processing slots (i.e.
std::string GetLabel() final
Return a string representation of the datasource type.
std::vector< ROOT::Internal::TDS::TPointerHolder * > PointerHolderPtrs_t
const std::vector< std::string > & GetColumnNames() const final
Returns a reference to the collection of the dataset's column names.
RLazyDS & operator=(RLazyDS &&)=delete
std::string GetTypeName(std::string_view colName) const final
Type of a column as a string, e.g.
void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence< S... >)
const PointerHolderPtrs_t fPointerHoldersModels
const std::map< std::string, std::string > fColTypesMap
void ColLengthChecker(std::index_sequence< S... >)
bool HasColumn(std::string_view colName) const final
Checks if the dataset has a certain column.
std::string AsString() final
void Initialize() final
Convenience method called before starting an event-loop.
RLazyDS & operator=(const RLazyDS &)=delete
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges() final
Return ranges of entries to distribute to tasks.
size_t GetEntriesNumber()
RLazyDS(std::pair< std::string, RResultPtr< std::vector< ColumnTypes > > >... colsNameVals)
Record_t GetColumnReadersImpl(std::string_view colName, const std::type_info &id) final
type-erased vector of pointers to pointers to column values - one per slot
bool SetEntry(unsigned int slot, ULong64_t entry) final
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
std::tuple< RResultPtr< std::vector< ColumnTypes > >... > fColumns
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Smart pointer for the return type of actions.
const_iterator begin() const
const_iterator end() const
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:123
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
TSeq< int > TSeqI
Definition TSeq.hxx:203
TSeq< unsigned int > TSeqU
Definition TSeq.hxx:204