Logo ROOT  
Reference Guide
RLoopManager.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RLOOPMANAGER
12#define ROOT_RLOOPMANAGER
13
17
18#include <functional>
19#include <map>
20#include <memory>
21#include <string>
22#include <vector>
23
24// forward declarations
25class TTree;
26class TTreeReader;
27class TDirectory;
28
29namespace ROOT {
30namespace RDF {
31class RCutFlowReport;
32class RDataSource;
33} // ns RDF
34
35namespace Internal {
36namespace RDF {
37std::vector<std::string> GetBranchNames(TTree &t, bool allowDuplicates = true);
38
39class GraphNode;
40class RActionBase;
41class RVariationBase;
42
43namespace GraphDrawing {
45} // ns GraphDrawing
46
47using Callback_t = std::function<void(unsigned int)>;
48
49class RCallback {
52 std::vector<ULong64_t> fCounters;
53
54public:
55 RCallback(ULong64_t everyN, Callback_t &&f, unsigned int nSlots)
56 : fFun(std::move(f)), fEveryN(everyN), fCounters(nSlots, 0ull)
57 {
58 }
59
60 void operator()(unsigned int slot)
61 {
62 auto &c = fCounters[slot];
63 ++c;
64 if (c == fEveryN) {
65 c = 0ull;
66 fFun(slot);
67 }
68 }
69};
70
73 std::vector<int> fHasBeenCalled; // std::vector<bool> is thread-unsafe for our purposes (and generally evil)
74
75public:
76 ROneTimeCallback(Callback_t &&f, unsigned int nSlots) : fFun(std::move(f)), fHasBeenCalled(nSlots, 0) {}
77
78 void operator()(unsigned int slot)
79 {
80 if (fHasBeenCalled[slot] == 1)
81 return;
82 fFun(slot);
83 fHasBeenCalled[slot] = 1;
84 }
85};
86
87} // ns RDF
88} // ns Internal
89
90namespace Detail {
91namespace RDF {
93
94class RFilterBase;
95class RRangeBase;
96class RDefineBase;
98
99/// The head node of a RDF computation graph.
100/// This class is responsible of running the event loop.
101class RLoopManager : public RNodeBase {
102 using ColumnNames_t = std::vector<std::string>;
103 enum class ELoopType { kROOTFiles, kROOTFilesMT, kNoFiles, kNoFilesMT, kDataSource, kDataSourceMT };
104
105 friend struct RCallCleanUpTask;
106
107 std::vector<RDFInternal::RActionBase *> fBookedActions; ///< Non-owning pointers to actions to be run
108 std::vector<RDFInternal::RActionBase *> fRunActions; ///< Non-owning pointers to actions already run
109 std::vector<RFilterBase *> fBookedFilters;
110 std::vector<RFilterBase *> fBookedNamedFilters; ///< Contains a subset of fBookedFilters, i.e. only the named filters
111 std::vector<RRangeBase *> fBookedRanges;
112 std::vector<RDefineBase *> fBookedDefines;
113 std::vector<RDFInternal::RVariationBase *> fBookedVariations;
114
115 /// Shared pointer to the input TTree. It does not delete the pointee if the TTree/TChain was passed directly as an
116 /// argument to RDataFrame's ctor (in which case we let users retain ownership).
117 std::shared_ptr<TTree> fTree{nullptr};
120 const unsigned int fNSlots{1};
122 const ELoopType fLoopType; ///< The kind of event loop that is going to be run (e.g. on ROOT files, on no files)
123 const std::unique_ptr<RDataSource> fDataSource; ///< Owning pointer to a data-source object. Null if no data-source
124 std::vector<RDFInternal::RCallback> fCallbacks; ///< Registered callbacks
125 /// Registered callbacks to invoke just once before running the loop
126 std::vector<RDFInternal::ROneTimeCallback> fCallbacksOnce;
127 /// Registered callbacks to call at the beginning of each "data block"
128 std::vector<ROOT::RDF::SampleCallback_t> fSampleCallbacks;
130 std::vector<ROOT::RDF::RSampleInfo> fSampleInfos;
131 unsigned int fNRuns{0}; ///< Number of event loops run
132
133 /// Registry of per-slot value pointers for booked data-source columns
134 std::map<std::string, std::vector<void *>> fDSValuePtrMap;
135
136 /// Cache of the tree/chain branch names. Never access directy, always use GetBranchNames().
138
140 void RunEmptySourceMT();
141 void RunEmptySource();
142 void RunTreeProcessorMT();
143 void RunTreeReader();
144 void RunDataSourceMT();
145 void RunDataSource();
146 void RunAndCheckFilters(unsigned int slot, Long64_t entry);
147 void InitNodeSlots(TTreeReader *r, unsigned int slot);
148 void InitNodes();
149 void CleanUpNodes();
150 void CleanUpTask(TTreeReader *r, unsigned int slot);
151 void EvalChildrenCounts();
152 void SetupSampleCallbacks(TTreeReader *r, unsigned int slot);
153 void UpdateSampleInfo(unsigned int slot, const std::pair<ULong64_t, ULong64_t> &range);
154 void UpdateSampleInfo(unsigned int slot, TTreeReader &r);
155
156public:
157 RLoopManager(TTree *tree, const ColumnNames_t &defaultBranches);
158 RLoopManager(ULong64_t nEmptyEntries);
159 RLoopManager(std::unique_ptr<RDataSource> ds, const ColumnNames_t &defaultBranches);
160 RLoopManager(const RLoopManager &) = delete;
162
164 void Jit();
165 RLoopManager *GetLoopManagerUnchecked() final { return this; }
166 void Run();
168 TTree *GetTree() const;
171 RDataSource *GetDataSource() const { return fDataSource.get(); }
172 void Book(RDFInternal::RActionBase *actionPtr);
173 void Deregister(RDFInternal::RActionBase *actionPtr);
174 void Book(RFilterBase *filterPtr);
175 void Deregister(RFilterBase *filterPtr);
176 void Book(RRangeBase *rangePtr);
177 void Deregister(RRangeBase *rangePtr);
178 void Book(RDefineBase *definePtr);
179 void Deregister(RDefineBase *definePtr);
180 void Book(RDFInternal::RVariationBase *varPtr);
182 bool CheckFilters(unsigned int, Long64_t) final;
183 unsigned int GetNSlots() const { return fNSlots; }
184 void Report(ROOT::RDF::RCutFlowReport &rep) const final;
185 /// End of recursive chain of calls, does nothing
187 void SetTree(const std::shared_ptr<TTree> &tree) { fTree = tree; }
188 void IncrChildrenCount() final { ++fNChildren; }
189 void StopProcessing() final { ++fNStopsReceived; }
190 void ToJitExec(const std::string &) const;
191 void RegisterCallback(ULong64_t everyNEvents, std::function<void(unsigned int)> &&f);
192 unsigned int GetNRuns() const { return fNRuns; }
193 bool HasDSValuePtrs(const std::string &col) const;
194 const std::map<std::string, std::vector<void *>> &GetDSValuePtrs() const { return fDSValuePtrMap; }
195 void AddDSValuePtrs(const std::string &col, const std::vector<void *> ptrs);
196
197 /// End of recursive chain of calls, does nothing
198 void AddFilterName(std::vector<std::string> &) {}
199 /// For each booked filter, returns either the name or "Unnamed Filter"
200 std::vector<std::string> GetFiltersNames();
201
202 /// Return all graph edges known to RLoopManager
203 /// This includes Filters and Ranges but not Defines.
204 std::vector<RNodeBase *> GetGraphEdges() const;
205
206 /// Return all actions, either booked or already run
207 std::vector<RDFInternal::RActionBase *> GetAllActions() const;
208
209 std::shared_ptr<ROOT::Internal::RDF::GraphDrawing::GraphNode>
210 GetGraph(std::unordered_map<void *, std::shared_ptr<ROOT::Internal::RDF::GraphDrawing::GraphNode>> &visitedMap);
211
213
215};
216
217} // ns RDF
218} // ns Detail
219} // ns ROOT
220
221#endif
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
long long Long64_t
Definition: RtypesCore.h:80
unsigned long long ULong64_t
Definition: RtypesCore.h:81
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
The head node of a RDF computation graph.
void UpdateSampleInfo(unsigned int slot, const std::pair< ULong64_t, ULong64_t > &range)
RLoopManager(TTree *tree, const ColumnNames_t &defaultBranches)
unsigned int fNRuns
Number of event loops run.
bool CheckFilters(unsigned int, Long64_t) final
void EvalChildrenCounts()
Trigger counting of number of children nodes for each node of the functional graph.
void CleanUpNodes()
Perform clean-up operations. To be called at the end of each event loop.
void RunEmptySource()
Run event loop with no source files, in sequence.
void Report(ROOT::RDF::RCutFlowReport &rep) const final
Call FillReport on all booked filters.
std::vector< RFilterBase * > fBookedNamedFilters
Contains a subset of fBookedFilters, i.e. only the named filters.
void RunEmptySourceMT()
Run event loop with no source files, in parallel.
ULong64_t GetNEmptyEntries() const
RLoopManager & operator=(const RLoopManager &)=delete
void AddDSValuePtrs(const std::string &col, const std::vector< void * > ptrs)
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
void AddSampleCallback(ROOT::RDF::SampleCallback_t &&callback)
unsigned int GetNRuns() const
std::vector< RDFInternal::RActionBase * > GetAllActions() const
Return all actions, either booked or already run.
std::vector< ROOT::RDF::RSampleInfo > fSampleInfos
::TDirectory * GetDirectory() const
std::shared_ptr< TTree > fTree
Shared pointer to the input TTree.
std::vector< RDefineBase * > fBookedDefines
void RunTreeReader()
Run event loop over one or multiple ROOT files, in sequence.
std::vector< RDFInternal::RActionBase * > fRunActions
Non-owning pointers to actions already run.
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
void AddFilterName(std::vector< std::string > &)
End of recursive chain of calls, does nothing.
std::vector< RRangeBase * > fBookedRanges
std::vector< ROOT::RDF::SampleCallback_t > fSampleCallbacks
Registered callbacks to call at the beginning of each "data block".
std::vector< std::string > ColumnNames_t
void RunAndCheckFilters(unsigned int slot, Long64_t entry)
Execute actions and make sure named filters are called for each event.
std::vector< RFilterBase * > fBookedFilters
std::vector< RDFInternal::RActionBase * > fBookedActions
Non-owning pointers to actions to be run.
std::vector< RDFInternal::RCallback > fCallbacks
Registered callbacks.
const ELoopType fLoopType
The kind of event loop that is going to be run (e.g. on ROOT files, on no files)
void SetupSampleCallbacks(TTreeReader *r, unsigned int slot)
ColumnNames_t fValidBranchNames
Cache of the tree/chain branch names. Never access directy, always use GetBranchNames().
void CleanUpTask(TTreeReader *r, unsigned int slot)
Perform clean-up operations. To be called at the end of each task execution.
const std::map< std::string, std::vector< void * > > & GetDSValuePtrs() const
std::map< std::string, std::vector< void * > > fDSValuePtrMap
Registry of per-slot value pointers for booked data-source columns.
void SetTree(const std::shared_ptr< TTree > &tree)
const ColumnNames_t & GetDefaultColumnNames() const
Return the list of default columns – empty if none was provided when constructing the RDataFrame.
std::vector< RDFInternal::RVariationBase * > fBookedVariations
std::vector< RNodeBase * > GetGraphEdges() const
Return all graph edges known to RLoopManager This includes Filters and Ranges but not Defines.
RDataSource * GetDataSource() const
unsigned int GetNSlots() const
void RunDataSourceMT()
Run event loop over data accessed through a DataSource, in parallel.
void PartialReport(ROOT::RDF::RCutFlowReport &) const final
End of recursive chain of calls, does nothing.
bool HasDSValuePtrs(const std::string &col) const
std::vector< std::string > GetFiltersNames()
For each booked filter, returns either the name or "Unnamed Filter".
RLoopManager(const RLoopManager &)=delete
const std::unique_ptr< RDataSource > fDataSource
Owning pointer to a data-source object. Null if no data-source.
RDFInternal::RNewSampleNotifier fNewSampleNotifier
const ColumnNames_t fDefaultColumns
void Book(RDFInternal::RActionBase *actionPtr)
void InitNodeSlots(TTreeReader *r, unsigned int slot)
Build TTreeReaderValues for all nodes This method loops over all filters, actions and other booked ob...
std::vector< RDFInternal::ROneTimeCallback > fCallbacksOnce
Registered callbacks to invoke just once before running the loop.
void RegisterCallback(ULong64_t everyNEvents, std::function< void(unsigned int)> &&f)
void RunDataSource()
Run event loop over data accessed through a DataSource, in sequence.
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
void RunTreeProcessorMT()
Run event loop over one or multiple ROOT files, in parallel.
std::shared_ptr< ROOT::Internal::RDF::GraphDrawing::GraphNode > GetGraph(std::unordered_map< void *, std::shared_ptr< ROOT::Internal::RDF::GraphDrawing::GraphNode > > &visitedMap)
void Deregister(RDFInternal::RActionBase *actionPtr)
void InitNodes()
Initialize all nodes of the functional graph before running the event loop.
RLoopManager * GetLoopManagerUnchecked() final
Base class for non-leaf nodes of the computational graph.
Definition: RNodeBase.hxx:43
unsigned int fNStopsReceived
Number of times that a children node signaled to stop processing entries.
Definition: RNodeBase.hxx:47
unsigned int fNChildren
Number of nodes of the functional graph hanging from this object.
Definition: RNodeBase.hxx:46
Helper class that provides the operation graph nodes.
RCallback(ULong64_t everyN, Callback_t &&f, unsigned int nSlots)
std::vector< ULong64_t > fCounters
void operator()(unsigned int slot)
void operator()(unsigned int slot)
ROneTimeCallback(Callback_t &&f, unsigned int nSlots)
This type includes all parts of RVariation that do not depend on the callable signature.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
Describe directory structure in memory.
Definition: TDirectory.h:45
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition: TTreeReader.h:44
A TTree represents a columnar dataset.
Definition: TTree.h:79
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
std::function< void(unsigned int)> Callback_t
void(off) SmallVectorTemplateBase< T
std::function< void(unsigned int, const ROOT::RDF::RSampleInfo &)> SampleCallback_t
The type of a data-block callback, registered with a RDataFrame computation graph via e....
Definition: RSampleInfo.hxx:84
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:167
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
Definition: tree.py:1
A RAII object that calls RLoopManager::CleanUpTask at destruction.