Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RChunkLoader.hxx
Go to the documentation of this file.
1// Author: Dante Niewenhuis, VU Amsterdam 07/2023
2// Author: Kristupas Pranckietis, Vilnius University 05/2024
3// Author: Nopphakorn Subsa-Ard, King Mongkut's University of Technology Thonburi (KMUTT) (TH) 08/2024
4// Author: Vincenzo Eduardo Padulano, CERN 10/2024
5
6/*************************************************************************
7 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
8 * All rights reserved. *
9 * *
10 * For the licensing terms see $ROOTSYS/LICENSE. *
11 * For the list of contributors see $ROOTSYS/README/CREDITS. *
12 *************************************************************************/
13
14#ifndef TMVA_RCHUNKLOADER
15#define TMVA_RCHUNKLOADER
16
17#include <vector>
18
19#include "TMVA/RTensor.hxx"
20#include "ROOT/RDataFrame.hxx"
21#include "ROOT/RDF/Utils.hxx"
22#include "ROOT/RVec.hxx"
23
24#include "ROOT/RLogger.hxx"
25
26namespace TMVA {
27namespace Experimental {
28namespace Internal {
29
30// RChunkLoader class used to load content of a RDataFrame onto a RTensor.
31template <typename... ColTypes>
33 std::size_t fOffset{};
34 std::size_t fVecSizeIdx{};
35 float fVecPadding{};
36 std::vector<std::size_t> fMaxVecSizes{};
37
39
40 template <typename T, std::enable_if_t<ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>
41 void AssignToTensor(const T &vec)
42 {
43 const auto &max_vec_size = fMaxVecSizes[fVecSizeIdx++];
44 const auto &vec_size = vec.size();
45 if (vec_size < max_vec_size) // Padding vector column to max_vec_size with fVecPadding
46 {
47 std::copy(vec.cbegin(), vec.cend(), &fChunkTensor.GetData()[fOffset]);
48 std::fill(&fChunkTensor.GetData()[fOffset + vec_size], &fChunkTensor.GetData()[fOffset + max_vec_size],
50 } else // Copy only max_vec_size length from vector column
51 {
52 std::copy(vec.cbegin(), vec.cbegin() + max_vec_size, &fChunkTensor.GetData()[fOffset]);
53 }
54 fOffset += max_vec_size;
55 }
56
57 template <typename T, std::enable_if_t<!ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>
58 void AssignToTensor(const T &val)
59 {
60 fChunkTensor.GetData()[fOffset++] = val;
61 }
62
63public:
64 RChunkLoaderFunctor(TMVA::Experimental::RTensor<float> &chunkTensor, const std::vector<std::size_t> &maxVecSizes,
65 float vecPadding)
66 : fChunkTensor(chunkTensor), fMaxVecSizes(maxVecSizes), fVecPadding(vecPadding)
67 {
68 }
69
70 void operator()(const ColTypes &...cols)
71 {
72 fVecSizeIdx = 0;
73 (AssignToTensor(cols), ...);
74 }
75};
76
77template <typename... ColTypes>
79
80private:
81 std::size_t fOffset{};
82 std::size_t fVecSizeIdx{};
83 std::size_t fEntries{};
84 std::size_t fChunkSize{};
85 float fVecPadding{};
86 std::vector<std::size_t> fMaxVecSizes{};
87
90
91 template <typename T, std::enable_if_t<ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>
92 void AssignToTensor(const T &vec)
93 {
94 std::size_t max_vec_size = fMaxVecSizes[fVecSizeIdx++];
95 std::size_t vec_size = vec.size();
96 if (vec_size < max_vec_size) // Padding vector column to max_vec_size with fVecPadding
97 {
98 std::copy(vec.begin(), vec.end(), &fChunkTensor.GetData()[fOffset]);
99 std::fill(&fChunkTensor.GetData()[fOffset + vec_size], &fChunkTensor.GetData()[fOffset + max_vec_size],
101 } else // Copy only max_vec_size length from vector column
102 {
103 std::copy(vec.begin(), vec.begin() + max_vec_size, &fChunkTensor.GetData()[fOffset]);
104 }
105 fOffset += max_vec_size;
106 fEntries++;
107 }
108
109 template <typename T, std::enable_if_t<!ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>
110 void AssignToTensor(const T &val)
111 {
112 fChunkTensor.GetData()[fOffset++] = val;
113 fEntries++;
114 }
115
116public:
118 TMVA::Experimental::RTensor<float> &remainderTensor, std::size_t entries,
119 std::size_t chunkSize, std::size_t &&offset,
120 const std::vector<std::size_t> &maxVecSizes = std::vector<std::size_t>(),
121 const float vecPadding = 0.0)
122 : fChunkTensor(chunkTensor),
123 fRemainderTensor(remainderTensor),
124 fEntries(entries),
125 fChunkSize(chunkSize),
127 fMaxVecSizes(maxVecSizes),
128 fVecPadding(vecPadding)
129 {
130 }
131
132 void operator()(const ColTypes &...cols)
133 {
134 fVecSizeIdx = 0;
135 if (fEntries == fChunkSize) {
137 fOffset = 0;
138 }
139 (AssignToTensor(cols), ...);
140 }
141
142 std::size_t &SetEntries() { return fEntries; }
143 std::size_t &SetOffset() { return fOffset; }
144};
145
146template <typename... Args>
148
149private:
150 std::size_t fChunkSize;
151
152 std::vector<std::string> fCols;
153
154 std::vector<std::size_t> fVecSizes;
155 std::size_t fVecPadding;
156
159
160public:
161 /// \brief Constructor for the RChunkLoader
162 /// \param rdf
163 /// \param chunkSize
164 /// \param cols
165 /// \param vecSizes
166 /// \param vecPadding
167 RChunkLoader(ROOT::RDF::RNode &rdf, TMVA::Experimental::RTensor<float> &chunkTensor, const std::size_t chunkSize,
168 const std::vector<std::string> &cols, const std::vector<std::size_t> &vecSizes = {},
169 const float vecPadding = 0.0)
170 : f_rdf(rdf),
171 fChunkTensor(chunkTensor),
172 fChunkSize(chunkSize),
173 fCols(cols),
174 fVecSizes(vecSizes),
175 fVecPadding(vecPadding)
176 {
177 }
178
179 /// \brief Load a chunk of data using the RChunkLoaderFunctor
180 /// \param chunkTensor
181 /// \param currentRow
182 /// \return Number of processed events
183 std::size_t LoadChunk(const std::size_t currentRow)
184 {
186
188 auto myCount = f_rdf.Count();
189
190 // load data
191 f_rdf.Foreach(func, fCols);
192
193 // get loading info
194 return myCount.GetValue();
195 }
196};
197
198template <typename... Args>
200
201private:
204
205 std::size_t fChunkSize;
206 std::vector<std::string> fCols;
207 const std::size_t fNumEntries;
208 std::size_t fNumAllEntries;
209 std::vector<std::size_t> fVecSizes;
210 std::size_t fVecPadding;
211 std::size_t fNumColumns;
212
213 const std::size_t fPartOfChunkSize;
216
217public:
218 /// \brief Constructor for the RChunkLoader
219 /// \param rdf
220 /// \param chunkSize
221 /// \param cols
222 /// \param filters
223 /// \param vecSizes
224 /// \param vecPadding
226 const std::size_t chunkSize, const std::vector<std::string> &cols, std::size_t numEntries,
227 std::size_t numAllEntries, const std::vector<std::size_t> &vecSizes = {},
228 const float vecPadding = 0.0)
229 : f_rdf(rdf),
230 fChunkTensor(chunkTensor),
231 fChunkSize(chunkSize),
232 fCols(cols),
233 fNumEntries(numEntries),
234 fNumAllEntries(numAllEntries),
235 fVecSizes(vecSizes),
236 fVecPadding(vecPadding),
237 fNumColumns(cols.size()),
238 fPartOfChunkSize(chunkSize / 5),
239 fRemainderChunkTensor(std::vector<std::size_t>{fPartOfChunkSize, fNumColumns})
240 {
241 }
242
243 /// \brief Load a chunk of data using the RChunkLoaderFunctor
244 /// \param chunkTensor
245 /// \param currentRow
246 /// \return A pair of size_t defining the number of events processed and how many passed all filters
247 std::pair<std::size_t, std::size_t> LoadChunk(std::size_t currentRow)
248 {
249 for (std::size_t i = 0; i < fRemainderChunkTensorRow; i++) {
250 std::copy(fRemainderChunkTensor.GetData() + (i * fNumColumns),
253 }
254
258
259 std::size_t passedEvents = 0;
260 std::size_t processedEvents = 0;
261
262 while ((passedEvents < fChunkSize && passedEvents < fNumEntries) && currentRow < fNumAllEntries) {
264 auto report = f_rdf.Report();
265
266 f_rdf.Foreach(func, fCols);
267
268 processedEvents += report.begin()->GetAll();
269 passedEvents += (report.end() - 1)->GetPass();
270
271 currentRow += fPartOfChunkSize;
272 func.SetEntries() = passedEvents;
273 func.SetOffset() = passedEvents * fNumColumns;
274 }
275
276 fRemainderChunkTensorRow = passedEvents > fChunkSize ? passedEvents - fChunkSize : 0;
277
278 return std::make_pair(processedEvents, passedEvents);
279 }
280
281 std::size_t LastChunk()
282 {
283 for (std::size_t i = 0; i < fRemainderChunkTensorRow; i++) {
284 std::copy(fRemainderChunkTensor.GetData() + (i * fNumColumns),
287 }
288
290 }
291};
292} // namespace Internal
293} // namespace Experimental
294} // namespace TMVA
295#endif // TMVA_RCHUNKLOADER
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
The public interface to the RDataFrame federation of classes.
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action).
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
std::pair< std::size_t, std::size_t > LoadChunk(std::size_t currentRow)
Load a chunk of data using the RChunkLoaderFunctor.
TMVA::Experimental::RTensor< float > & fChunkTensor
TMVA::Experimental::RTensor< float > fRemainderChunkTensor
RChunkLoaderFilters(ROOT::RDF::RNode &rdf, TMVA::Experimental::RTensor< float > &chunkTensor, const std::size_t chunkSize, const std::vector< std::string > &cols, std::size_t numEntries, std::size_t numAllEntries, const std::vector< std::size_t > &vecSizes={}, const float vecPadding=0.0)
Constructor for the RChunkLoader.
RChunkLoaderFunctorFilters(TMVA::Experimental::RTensor< float > &chunkTensor, TMVA::Experimental::RTensor< float > &remainderTensor, std::size_t entries, std::size_t chunkSize, std::size_t &&offset, const std::vector< std::size_t > &maxVecSizes=std::vector< std::size_t >(), const float vecPadding=0.0)
TMVA::Experimental::RTensor< float > & fChunkTensor
TMVA::Experimental::RTensor< float > & fRemainderTensor
RChunkLoaderFunctor(TMVA::Experimental::RTensor< float > &chunkTensor, const std::vector< std::size_t > &maxVecSizes, float vecPadding)
TMVA::Experimental::RTensor< float > & fChunkTensor
std::size_t LoadChunk(const std::size_t currentRow)
Load a chunk of data using the RChunkLoaderFunctor.
RChunkLoader(ROOT::RDF::RNode &rdf, TMVA::Experimental::RTensor< float > &chunkTensor, const std::size_t chunkSize, const std::vector< std::string > &cols, const std::vector< std::size_t > &vecSizes={}, const float vecPadding=0.0)
Constructor for the RChunkLoader.
TMVA::Experimental::RTensor< float > & fChunkTensor
RTensor is a container with contiguous memory and shape information.
Definition RTensor.hxx:162
void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end)
create variable transformations