Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RVecDS.hxx
Go to the documentation of this file.
1// Author: Stefan Wunsch CERN 04/2019
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#include <ROOT/RDataFrame.hxx>
12#include <ROOT/RDataSource.hxx>
13#include <ROOT/RVec.hxx>
14#include <ROOT/TSeq.hxx>
15
16#include <algorithm>
17#include <functional>
18#include <map>
19#include <memory>
20#include <string>
21#include <tuple>
22#include <typeinfo>
23#include <utility>
24#include <vector>
25
26#ifndef ROOT_RVECDS
27#define ROOT_RVECDS
28
29namespace ROOT {
30
31namespace Internal {
32
33namespace RDF {
34
35////////////////////////////////////////////////////////////////////////////////////////////////
36/// \brief A RDataSource implementation which takes a collection of RVecs, which
37/// are able to adopt data from Numpy arrays
38///
39/// This component allows to create a data source on a set of columns with data
40/// coming from RVecs. The adoption of externally provided data, e.g., via Numpy
41/// arrays, with RVecs allows to read arbitrary data from memory.
42/// In addition, the data source has to keep a reference on the Python owned data
43/// so that the lifetime of the data is tied to the datasource.
44template <typename... ColumnTypes>
46 using PointerHolderPtrs_t = std::vector<ROOT::Internal::TDS::TPointerHolder *>;
47
48 std::tuple<ROOT::RVec<ColumnTypes>...> fColumns;
49 const std::vector<std::string> fColNames;
50 const std::map<std::string, std::string> fColTypesMap;
51 // The role of the fPointerHoldersModels is to be initialised with the pack
52 // of arguments in the constrcutor signature at construction time
53 // Once the number of slots is known, the fPointerHolders are initialised
54 // according to the models.
56 std::vector<PointerHolderPtrs_t> fPointerHolders;
57 std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges{};
58 std::function<void()> fDeleteRVecs;
59
60 Record_t GetColumnReadersImpl(std::string_view colName, const std::type_info &id)
61 {
62 auto colNameStr = std::string(colName);
63 // This could be optimised and done statically
65 auto it = fColTypesMap.find(colNameStr);
66 if (fColTypesMap.end() == it) {
67 std::string err = "The specified column name, \"" + colNameStr + "\" is not known to the data source.";
68 throw std::runtime_error(err);
69 }
70
71 const auto colIdName = it->second;
72 if (colIdName != idName) {
73 std::string err = "Column " + colNameStr + " has type " + colIdName +
74 " while the id specified is associated to type " + idName;
75 throw std::runtime_error(err);
76 }
77
78 const auto colBegin = fColNames.begin();
79 const auto colEnd = fColNames.end();
80 const auto namesIt = std::find(colBegin, colEnd, colName);
81 const auto index = std::distance(colBegin, namesIt);
82
84 for (auto slot : ROOT::TSeqU(fNSlots)) {
85 ret[slot] = fPointerHolders[index][slot]->GetPointerAddr();
86 }
87 return ret;
88 }
89
90 size_t GetEntriesNumber() { return std::get<0>(fColumns).size(); }
91 template <std::size_t... S>
92 void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence<S...>)
93 {
94 std::initializer_list<int> expander{
95 (*static_cast<ColumnTypes *>(fPointerHolders[S][slot]->GetPointer()) = std::get<S>(fColumns)[entry], 0)...};
96 (void)expander; // avoid unused variable warnings
97 }
98
99 template <std::size_t... S>
100 void ColLengthChecker(std::index_sequence<S...>)
101 {
102 if (sizeof...(S) < 2)
103 return;
104
105 const std::vector<size_t> colLengths{std::get<S>(fColumns).size()...};
106 const auto expectedLen = colLengths[0];
107 std::string err;
108 for (auto i : TSeqI(1, colLengths.size())) {
109 if (expectedLen != colLengths[i]) {
110 err += "Column \"" + fColNames[i] + "\" and column \"" + fColNames[0] +
111 "\" have different lengths: " + std::to_string(expectedLen) + " and " +
112 std::to_string(colLengths[i]);
113 }
114 }
115 if (!err.empty()) {
116 throw std::runtime_error(err);
117 }
118 }
119
120protected:
121 std::string AsString() { return "Numpy data source"; };
122
123public:
132
133 // Rule of five
134 RVecDS(const RVecDS &) = delete;
135 RVecDS &operator=(const RVecDS &) = delete;
136 RVecDS(RVecDS &&) = delete;
137 RVecDS &operator=(RVecDS &&) = delete;
139 {
140 for (auto &&ptrHolderv : fPointerHolders) {
141 for (auto &&ptrHolder : ptrHolderv) {
142 delete ptrHolder;
143 }
144 }
145 // Release the data associated to this data source
146 fDeleteRVecs();
147 }
148
149 const std::vector<std::string> &GetColumnNames() const { return fColNames; }
150
151 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges()
152 {
153 auto entryRanges(std::move(fEntryRanges)); // empty fEntryRanges
154 return entryRanges;
155 }
156
157 std::string GetTypeName(std::string_view colName) const
158 {
159 const auto key = std::string(colName);
160 return fColTypesMap.at(key);
161 }
162
163 bool HasColumn(std::string_view colName) const
164 {
165 const auto key = std::string(colName);
166 const auto endIt = fColTypesMap.end();
167 return endIt != fColTypesMap.find(key);
168 }
169
170 bool SetEntry(unsigned int slot, ULong64_t entry)
171 {
172 SetEntryHelper(slot, entry, std::index_sequence_for<ColumnTypes...>());
173 return true;
174 }
175
176 void SetNSlots(unsigned int nSlots)
177 {
178 fNSlots = nSlots;
179 const auto nCols = fColNames.size();
180 fPointerHolders.resize(nCols); // now we need to fill it with the slots, all of the same type
181 auto colIndex = 0U;
182 for (auto &&ptrHolderv : fPointerHolders) {
183 for (auto slot : ROOT::TSeqI(fNSlots)) {
184 auto ptrHolder = fPointerHoldersModels[colIndex]->GetDeepCopy();
185 ptrHolderv.emplace_back(ptrHolder);
186 (void)slot;
187 }
188 colIndex++;
189 }
190 for (auto &&ptrHolder : fPointerHoldersModels)
191 delete ptrHolder;
192 }
193
195 {
196 ColLengthChecker(std::index_sequence_for<ColumnTypes...>());
197 const auto nEntries = GetEntriesNumber();
198 const auto nEntriesInRange = nEntries / fNSlots; // between integers. Should make smaller?
199 auto reminder = 1U == fNSlots ? 0 : nEntries % fNSlots;
200 fEntryRanges.resize(fNSlots);
201 auto init = 0ULL;
202 auto end = 0ULL;
203 for (auto &&range : fEntryRanges) {
204 end = init + nEntriesInRange;
205 if (0 != reminder) { // Distribute the reminder among the first chunks
206 reminder--;
207 end += 1;
208 }
209 range.first = init;
210 range.second = end;
211 init = end;
212 }
213 }
214
215 std::string GetLabel() { return "RVecDS"; }
216};
217
218// Factory to create datasource able to read Numpy arrays through RVecs.
219// \param pyRVecs Pointer to PyObject holding RVecs.
220// The RVecs itself hold a reference to the associated Numpy arrays so that
221// the data cannot go out of scope as long as the datasource survives.
222template <typename... ColumnTypes>
223std::unique_ptr<RDataFrame>
224MakeRVecDataFrame(std::function<void()> deleteRVecs,
225 std::pair<std::string, ROOT::RVec<ColumnTypes>> const &...colNameProxyPairs)
226{
227 return std::make_unique<RDataFrame>(std::make_unique<RVecDS<ColumnTypes...>>(deleteRVecs, colNameProxyPairs...));
228}
229
230} // namespace RDF
231} // namespace Internal
232} // namespace ROOT
233
234#endif // ROOT_RNUMPYDS
unsigned long long ULong64_t
Definition RtypesCore.h:70
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
A RDataSource implementation which takes a collection of RVecs, which are able to adopt data from Num...
Definition RVecDS.hxx:45
RVecDS(RVecDS &&)=delete
Record_t GetColumnReadersImpl(std::string_view colName, const std::type_info &id)
type-erased vector of pointers to pointers to column values - one per slot
Definition RVecDS.hxx:60
std::vector< PointerHolderPtrs_t > fPointerHolders
Definition RVecDS.hxx:56
void Initialize()
Convenience method called before starting an event-loop.
Definition RVecDS.hxx:194
RVecDS & operator=(const RVecDS &)=delete
std::string GetTypeName(std::string_view colName) const
Type of a column as a string, e.g.
Definition RVecDS.hxx:157
const PointerHolderPtrs_t fPointerHoldersModels
Definition RVecDS.hxx:55
const std::vector< std::string > & GetColumnNames() const
Returns a reference to the collection of the dataset's column names.
Definition RVecDS.hxx:149
bool SetEntry(unsigned int slot, ULong64_t entry)
Advance the "cursors" returned by GetColumnReaders to the selected entry for a particular slot.
Definition RVecDS.hxx:170
std::function< void()> fDeleteRVecs
Definition RVecDS.hxx:58
void SetNSlots(unsigned int nSlots)
Inform RDataSource of the number of processing slots (i.e.
Definition RVecDS.hxx:176
std::vector< ROOT::Internal::TDS::TPointerHolder * > PointerHolderPtrs_t
Definition RVecDS.hxx:46
std::tuple< ROOT::RVec< ColumnTypes >... > fColumns
Definition RVecDS.hxx:48
std::string GetLabel()
Return a string representation of the datasource type.
Definition RVecDS.hxx:215
RVecDS & operator=(RVecDS &&)=delete
RVecDS(const RVecDS &)=delete
const std::vector< std::string > fColNames
Definition RVecDS.hxx:49
std::vector< std::pair< ULong64_t, ULong64_t > > fEntryRanges
Definition RVecDS.hxx:57
void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence< S... >)
Definition RVecDS.hxx:92
void ColLengthChecker(std::index_sequence< S... >)
Definition RVecDS.hxx:100
RVecDS(std::function< void()> deleteRVecs, std::pair< std::string, ROOT::RVec< ColumnTypes > > const &...colsNameVals)
Definition RVecDS.hxx:124
bool HasColumn(std::string_view colName) const
Checks if the dataset has a certain column.
Definition RVecDS.hxx:163
std::vector< std::pair< ULong64_t, ULong64_t > > GetEntryRanges()
Return ranges of entries to distribute to tasks.
Definition RVecDS.hxx:151
const std::map< std::string, std::string > fColTypesMap
Definition RVecDS.hxx:50
Class to wrap a pointer and delete the memory associated to it correctly.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::vector< void * > Record_t
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1529
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:119
std::unique_ptr< RDataFrame > MakeRVecDataFrame(std::function< void()> deleteRVecs, std::pair< std::string, ROOT::RVec< ColumnTypes > > const &...colNameProxyPairs)
Definition RVecDS.hxx:224
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
TSeq< int > TSeqI
Definition TSeq.hxx:203
TSeq< unsigned int > TSeqU
Definition TSeq.hxx:204