Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RLoopManager.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RLOOPMANAGER
12#define ROOT_RLOOPMANAGER
13
18
19#include <functional>
20#include <limits>
21#include <map>
22#include <memory>
23#include <string>
24#include <unordered_map>
25#include <vector>
26
27// forward declarations
28class TTree;
29class TTreeReader;
30class TDirectory;
31
32namespace ROOT {
33namespace RDF {
34class RCutFlowReport;
35class RDataSource;
36} // ns RDF
37
38namespace Internal {
39namespace RDF {
40std::vector<std::string> GetBranchNames(TTree &t, bool allowDuplicates = true);
41
42class GraphNode;
43class RActionBase;
44class RVariationBase;
45
46namespace GraphDrawing {
48} // ns GraphDrawing
49
50using Callback_t = std::function<void(unsigned int)>;
51
52class RCallback {
55 std::vector<ULong64_t> fCounters;
56
57public:
58 RCallback(ULong64_t everyN, Callback_t &&f, unsigned int nSlots)
59 : fFun(std::move(f)), fEveryN(everyN), fCounters(nSlots, 0ull)
60 {
61 }
62
63 void operator()(unsigned int slot)
64 {
65 auto &c = fCounters[slot];
66 ++c;
67 if (c == fEveryN) {
68 c = 0ull;
69 fFun(slot);
70 }
71 }
72};
73
76 std::vector<int> fHasBeenCalled; // std::vector<bool> is thread-unsafe for our purposes (and generally evil)
77
78public:
79 ROneTimeCallback(Callback_t &&f, unsigned int nSlots) : fFun(std::move(f)), fHasBeenCalled(nSlots, 0) {}
80
81 void operator()(unsigned int slot)
82 {
83 if (fHasBeenCalled[slot] == 1)
84 return;
85 fFun(slot);
86 fHasBeenCalled[slot] = 1;
87 }
88};
89
90} // ns RDF
91} // ns Internal
92
93namespace Detail {
94namespace RDF {
96
97class RFilterBase;
98class RRangeBase;
99class RDefineBase;
101
102/// The head node of a RDF computation graph.
103/// This class is responsible of running the event loop.
104class RLoopManager : public RNodeBase {
105 using ColumnNames_t = std::vector<std::string>;
107
108 friend struct RCallCleanUpTask;
109
110 std::vector<RDFInternal::RActionBase *> fBookedActions; ///< Non-owning pointers to actions to be run
111 std::vector<RDFInternal::RActionBase *> fRunActions; ///< Non-owning pointers to actions already run
112 std::vector<RFilterBase *> fBookedFilters;
113 std::vector<RFilterBase *> fBookedNamedFilters; ///< Contains a subset of fBookedFilters, i.e. only the named filters
114 std::vector<RRangeBase *> fBookedRanges;
115 std::vector<RDefineBase *> fBookedDefines;
116 std::vector<RDFInternal::RVariationBase *> fBookedVariations;
117
118 /// Shared pointer to the input TTree. It does not delete the pointee if the TTree/TChain was passed directly as an
119 /// argument to RDataFrame's ctor (in which case we let users retain ownership).
120 std::shared_ptr<TTree> fTree{nullptr};
122 Long64_t fEndEntry{std::numeric_limits<Long64_t>::max()};
123 std::vector<std::unique_ptr<TTree>> fFriends; ///< Friends of the fTree. Only used if we constructed fTree ourselves.
126 const unsigned int fNSlots{1};
128 const ELoopType fLoopType; ///< The kind of event loop that is going to be run (e.g. on ROOT files, on no files)
129 const std::unique_ptr<RDataSource> fDataSource; ///< Owning pointer to a data-source object. Null if no data-source
130 std::vector<RDFInternal::RCallback> fCallbacks; ///< Registered callbacks
131 /// Registered callbacks to invoke just once before running the loop
132 std::vector<RDFInternal::ROneTimeCallback> fCallbacksOnce;
133 /// Registered callbacks to call at the beginning of each "data block".
134 /// The key is the pointer of the corresponding node in the computation graph (a RDefinePerSample or a RAction).
135 std::unordered_map<void *, ROOT::RDF::SampleCallback_t> fSampleCallbacks;
137 std::vector<ROOT::RDF::RSampleInfo> fSampleInfos;
138 unsigned int fNRuns{0}; ///< Number of event loops run
139
140 /// Registry of per-slot value pointers for booked data-source columns
141 std::map<std::string, std::vector<void *>> fDSValuePtrMap;
142
143 /// Cache of the tree/chain branch names. Never access directy, always use GetBranchNames().
145
147 void RunEmptySourceMT();
148 void RunEmptySource();
149 void RunTreeProcessorMT();
150 void RunTreeReader();
151 void RunDataSourceMT();
152 void RunDataSource();
153 void RunAndCheckFilters(unsigned int slot, Long64_t entry);
154 void InitNodeSlots(TTreeReader *r, unsigned int slot);
155 void InitNodes();
156 void CleanUpNodes();
157 void CleanUpTask(TTreeReader *r, unsigned int slot);
158 void EvalChildrenCounts();
159 void SetupSampleCallbacks(TTreeReader *r, unsigned int slot);
160 void UpdateSampleInfo(unsigned int slot, const std::pair<ULong64_t, ULong64_t> &range);
161 void UpdateSampleInfo(unsigned int slot, TTreeReader &r);
162
163public:
164 RLoopManager(TTree *tree, const ColumnNames_t &defaultBranches);
165 RLoopManager(ULong64_t nEmptyEntries);
166 RLoopManager(std::unique_ptr<RDataSource> ds, const ColumnNames_t &defaultBranches);
167 /// \cond HIDDEN_SYMBOLS
168 RLoopManager(ROOT::Internal::RDF::RDatasetSpec &&spec);
169 /// \endcond
170 RLoopManager(const RLoopManager &) = delete;
172
174 void Jit();
175 RLoopManager *GetLoopManagerUnchecked() final { return this; }
176 void Run();
178 TTree *GetTree() const;
181 RDataSource *GetDataSource() const { return fDataSource.get(); }
182 void Book(RDFInternal::RActionBase *actionPtr);
183 void Deregister(RDFInternal::RActionBase *actionPtr);
184 void Book(RFilterBase *filterPtr);
185 void Deregister(RFilterBase *filterPtr);
186 void Book(RRangeBase *rangePtr);
187 void Deregister(RRangeBase *rangePtr);
188 void Book(RDefineBase *definePtr);
189 void Deregister(RDefineBase *definePtr);
190 void Book(RDFInternal::RVariationBase *varPtr);
192 bool CheckFilters(unsigned int, Long64_t) final;
193 unsigned int GetNSlots() const { return fNSlots; }
194 void Report(ROOT::RDF::RCutFlowReport &rep) const final;
195 /// End of recursive chain of calls, does nothing
197 void SetTree(const std::shared_ptr<TTree> &tree) { fTree = tree; }
198 void IncrChildrenCount() final { ++fNChildren; }
199 void StopProcessing() final { ++fNStopsReceived; }
200 void ToJitExec(const std::string &) const;
201 void RegisterCallback(ULong64_t everyNEvents, std::function<void(unsigned int)> &&f);
202 unsigned int GetNRuns() const { return fNRuns; }
203 bool HasDSValuePtrs(const std::string &col) const;
204 const std::map<std::string, std::vector<void *>> &GetDSValuePtrs() const { return fDSValuePtrMap; }
205 void AddDSValuePtrs(const std::string &col, const std::vector<void *> ptrs);
206
207 /// End of recursive chain of calls, does nothing
208 void AddFilterName(std::vector<std::string> &) {}
209 /// For each booked filter, returns either the name or "Unnamed Filter"
210 std::vector<std::string> GetFiltersNames();
211
212 /// Return all graph edges known to RLoopManager
213 /// This includes Filters and Ranges but not Defines.
214 std::vector<RNodeBase *> GetGraphEdges() const;
215
216 /// Return all actions, either booked or already run
217 std::vector<RDFInternal::RActionBase *> GetAllActions() const;
218
219 std::shared_ptr<ROOT::Internal::RDF::GraphDrawing::GraphNode> GetGraph();
220
222
223 void AddSampleCallback(void *nodePtr, ROOT::RDF::SampleCallback_t &&callback);
224};
225
226} // ns RDF
227} // ns Detail
228} // ns ROOT
229
230#endif
typedef void(GLAPIENTRYP _GLUfuncptr)(void)
ROOT::R::TRInterface & r
Definition Object.C:4
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
long long Long64_t
Definition RtypesCore.h:80
unsigned long long ULong64_t
Definition RtypesCore.h:81
The head node of a RDF computation graph.
void UpdateSampleInfo(unsigned int slot, const std::pair< ULong64_t, ULong64_t > &range)
unsigned int fNRuns
Number of event loops run.
bool CheckFilters(unsigned int, Long64_t) final
void EvalChildrenCounts()
Trigger counting of number of children nodes for each node of the functional graph.
void CleanUpNodes()
Perform clean-up operations. To be called at the end of each event loop.
void RunEmptySource()
Run event loop with no source files, in sequence.
void Report(ROOT::RDF::RCutFlowReport &rep) const final
Call FillReport on all booked filters.
void AddSampleCallback(void *nodePtr, ROOT::RDF::SampleCallback_t &&callback)
std::vector< RFilterBase * > fBookedNamedFilters
Contains a subset of fBookedFilters, i.e. only the named filters.
void RunEmptySourceMT()
Run event loop with no source files, in parallel.
RLoopManager & operator=(const RLoopManager &)=delete
void AddDSValuePtrs(const std::string &col, const std::vector< void * > ptrs)
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
std::vector< RDFInternal::RActionBase * > GetAllActions() const
Return all actions, either booked or already run.
std::vector< ROOT::RDF::RSampleInfo > fSampleInfos
::TDirectory * GetDirectory() const
std::shared_ptr< TTree > fTree
Shared pointer to the input TTree.
std::vector< std::unique_ptr< TTree > > fFriends
Friends of the fTree. Only used if we constructed fTree ourselves.
std::vector< RDefineBase * > fBookedDefines
void RunTreeReader()
Run event loop over one or multiple ROOT files, in sequence.
std::vector< RDFInternal::RActionBase * > fRunActions
Non-owning pointers to actions already run.
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
void AddFilterName(std::vector< std::string > &)
End of recursive chain of calls, does nothing.
std::vector< RRangeBase * > fBookedRanges
std::vector< std::string > ColumnNames_t
void RunAndCheckFilters(unsigned int slot, Long64_t entry)
Execute actions and make sure named filters are called for each event.
std::vector< RFilterBase * > fBookedFilters
std::unordered_map< void *, ROOT::RDF::SampleCallback_t > fSampleCallbacks
Registered callbacks to call at the beginning of each "data block".
std::vector< RDFInternal::RActionBase * > fBookedActions
Non-owning pointers to actions to be run.
std::vector< RDFInternal::RCallback > fCallbacks
Registered callbacks.
std::shared_ptr< ROOT::Internal::RDF::GraphDrawing::GraphNode > GetGraph()
const ELoopType fLoopType
The kind of event loop that is going to be run (e.g. on ROOT files, on no files)
void SetupSampleCallbacks(TTreeReader *r, unsigned int slot)
ColumnNames_t fValidBranchNames
Cache of the tree/chain branch names. Never access directy, always use GetBranchNames().
void CleanUpTask(TTreeReader *r, unsigned int slot)
Perform clean-up operations. To be called at the end of each task execution.
const std::map< std::string, std::vector< void * > > & GetDSValuePtrs() const
std::map< std::string, std::vector< void * > > fDSValuePtrMap
Registry of per-slot value pointers for booked data-source columns.
void SetTree(const std::shared_ptr< TTree > &tree)
const ColumnNames_t & GetDefaultColumnNames() const
Return the list of default columns – empty if none was provided when constructing the RDataFrame.
std::vector< RDFInternal::RVariationBase * > fBookedVariations
std::vector< RNodeBase * > GetGraphEdges() const
Return all graph edges known to RLoopManager This includes Filters and Ranges but not Defines.
RDataSource * GetDataSource() const
void RunDataSourceMT()
Run event loop over data accessed through a DataSource, in parallel.
void PartialReport(ROOT::RDF::RCutFlowReport &) const final
End of recursive chain of calls, does nothing.
bool HasDSValuePtrs(const std::string &col) const
std::vector< std::string > GetFiltersNames()
For each booked filter, returns either the name or "Unnamed Filter".
RLoopManager(const RLoopManager &)=delete
const std::unique_ptr< RDataSource > fDataSource
Owning pointer to a data-source object. Null if no data-source.
RDFInternal::RNewSampleNotifier fNewSampleNotifier
const ColumnNames_t fDefaultColumns
void Book(RDFInternal::RActionBase *actionPtr)
void InitNodeSlots(TTreeReader *r, unsigned int slot)
Build TTreeReaderValues for all nodes This method loops over all filters, actions and other booked ob...
std::vector< RDFInternal::ROneTimeCallback > fCallbacksOnce
Registered callbacks to invoke just once before running the loop.
void RegisterCallback(ULong64_t everyNEvents, std::function< void(unsigned int)> &&f)
void RunDataSource()
Run event loop over data accessed through a DataSource, in sequence.
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
void RunTreeProcessorMT()
Run event loop over one or multiple ROOT files, in parallel.
void Deregister(RDFInternal::RActionBase *actionPtr)
void InitNodes()
Initialize all nodes of the functional graph before running the event loop.
RLoopManager * GetLoopManagerUnchecked() final
Base class for non-leaf nodes of the computational graph.
Definition RNodeBase.hxx:42
unsigned int fNStopsReceived
Number of times that a children node signaled to stop processing entries.
Definition RNodeBase.hxx:46
unsigned int fNChildren
Number of nodes of the functional graph hanging from this object.
Definition RNodeBase.hxx:45
Helper class that provides the operation graph nodes.
Class used to create the operation graph to be printed in the dot representation.
RCallback(ULong64_t everyN, Callback_t &&f, unsigned int nSlots)
std::vector< ULong64_t > fCounters
void operator()(unsigned int slot)
ROneTimeCallback(Callback_t &&f, unsigned int nSlots)
This type includes all parts of RVariation that do not depend on the callable signature.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
Describe directory structure in memory.
Definition TDirectory.h:45
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:44
A TTree represents a columnar dataset.
Definition TTree.h:79
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
std::function< void(unsigned int)> Callback_t
std::function< void(unsigned int, const ROOT::RDF::RSampleInfo &)> SampleCallback_t
The type of a data-block callback, registered with a RDataFrame computation graph via e....
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition tree.py:1
A RAII object that calls RLoopManager::CleanUpTask at destruction.