Loading [MathJax]/extensions/tex2jax.js
Logo ROOT  
Reference Guide
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
RLoopManager.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RLOOPMANAGER
12#define ROOT_RLOOPMANAGER
13
16
17#include <functional>
18#include <map>
19#include <memory>
20#include <string>
21#include <vector>
22
23// forward declarations
24class TTreeReader;
25
26namespace ROOT {
27namespace RDF {
28class RCutFlowReport;
29class RDataSource;
30} // ns RDF
31
32namespace Internal {
33namespace RDF {
34ColumnNames_t GetBranchNames(TTree &t, bool allowDuplicates = true);
35
36class RActionBase;
37class GraphNode;
38
39namespace GraphDrawing {
41} // ns GraphDrawing
42} // ns RDF
43} // ns Internal
44
45namespace Detail {
46namespace RDF {
47using namespace ROOT::TypeTraits;
49
50class RFilterBase;
51class RRangeBase;
53
54/// The head node of a RDF computation graph.
55/// This class is responsible of running the event loop.
56class RLoopManager : public RNodeBase {
57 enum class ELoopType { kROOTFiles, kROOTFilesMT, kNoFiles, kNoFilesMT, kDataSource, kDataSourceMT };
58 using Callback_t = std::function<void(unsigned int)>;
59 class TCallback {
62 std::vector<ULong64_t> fCounters;
63
64 public:
65 TCallback(ULong64_t everyN, Callback_t &&f, unsigned int nSlots)
66 : fFun(std::move(f)), fEveryN(everyN), fCounters(nSlots, 0ull)
67 {
68 }
69
70 void operator()(unsigned int slot)
71 {
72 auto &c = fCounters[slot];
73 ++c;
74 if (c == fEveryN) {
75 c = 0ull;
76 fFun(slot);
77 }
78 }
79 };
80
83 std::vector<int> fHasBeenCalled; // std::vector<bool> is thread-unsafe for our purposes (and generally evil)
84
85 public:
86 TOneTimeCallback(Callback_t &&f, unsigned int nSlots) : fFun(std::move(f)), fHasBeenCalled(nSlots, 0) {}
87
88 void operator()(unsigned int slot)
89 {
90 if (fHasBeenCalled[slot] == 1)
91 return;
92 fFun(slot);
93 fHasBeenCalled[slot] = 1;
94 }
95 };
96
97 std::vector<RDFInternal::RActionBase *> fBookedActions; ///< Non-owning pointers to actions to be run
98 std::vector<RDFInternal::RActionBase *> fRunActions; ///< Non-owning pointers to actions already run
99 std::vector<RFilterBase *> fBookedFilters;
100 std::vector<RFilterBase *> fBookedNamedFilters; ///< Contains a subset of fBookedFilters, i.e. only the named filters
101 std::vector<RRangeBase *> fBookedRanges;
102
103 /// Shared pointer to the input TTree. It does not delete the pointee if the TTree/TChain was passed directly as an
104 /// argument to RDataFrame's ctor (in which case we let users retain ownership).
105 std::shared_ptr<TTree> fTree{nullptr};
106 const ColumnNames_t fDefaultColumns;
108 const unsigned int fNSlots{1};
110 const ELoopType fLoopType; ///< The kind of event loop that is going to be run (e.g. on ROOT files, on no files)
111 const std::unique_ptr<RDataSource> fDataSource; ///< Owning pointer to a data-source object. Null if no data-source
112 std::map<std::string, std::string> fAliasColumnNameMap; ///< ColumnNameAlias-columnName pairs
113 std::vector<TCallback> fCallbacks; ///< Registered callbacks
114 std::vector<TOneTimeCallback> fCallbacksOnce; ///< Registered callbacks to invoke just once before running the loop
115 unsigned int fNRuns{0}; ///< Number of event loops run
116
117 /// Cache of the tree/chain branch names. Never access directy, always use GetBranchNames().
118 ColumnNames_t fValidBranchNames;
119
120 void CheckIndexedFriends();
121 void RunEmptySourceMT();
122 void RunEmptySource();
123 void RunTreeProcessorMT();
124 void RunTreeReader();
125 void RunDataSourceMT();
126 void RunDataSource();
127 void RunAndCheckFilters(unsigned int slot, Long64_t entry);
128 void InitNodeSlots(TTreeReader *r, unsigned int slot);
129 void InitNodes();
130 void CleanUpNodes();
131 void CleanUpTask(unsigned int slot);
132 void EvalChildrenCounts();
133
134public:
135 RLoopManager(TTree *tree, const ColumnNames_t &defaultBranches);
136 RLoopManager(ULong64_t nEmptyEntries);
137 RLoopManager(std::unique_ptr<RDataSource> ds, const ColumnNames_t &defaultBranches);
138 RLoopManager(const RLoopManager &) = delete;
140
142 void Jit();
143 RLoopManager *GetLoopManagerUnchecked() final { return this; }
144 void Run();
145 const ColumnNames_t &GetDefaultColumnNames() const;
146 TTree *GetTree() const;
149 RDataSource *GetDataSource() const { return fDataSource.get(); }
150 void Book(RDFInternal::RActionBase *actionPtr);
151 void Deregister(RDFInternal::RActionBase *actionPtr);
152 void Book(RFilterBase *filterPtr);
153 void Deregister(RFilterBase *filterPtr);
154 void Book(RRangeBase *rangePtr);
155 void Deregister(RRangeBase *rangePtr);
156 bool CheckFilters(unsigned int, Long64_t) final;
157 unsigned int GetNSlots() const { return fNSlots; }
158 void Report(ROOT::RDF::RCutFlowReport &rep) const final;
159 /// End of recursive chain of calls, does nothing
161 void SetTree(const std::shared_ptr<TTree> &tree) { fTree = tree; }
162 void IncrChildrenCount() final { ++fNChildren; }
163 void StopProcessing() final { ++fNStopsReceived; }
164 void ToJitExec(const std::string &) const;
165 void AddColumnAlias(const std::string &alias, const std::string &colName) { fAliasColumnNameMap[alias] = colName; }
166 const std::map<std::string, std::string> &GetAliasMap() const { return fAliasColumnNameMap; }
167 void RegisterCallback(ULong64_t everyNEvents, std::function<void(unsigned int)> &&f);
168 unsigned int GetNRuns() const { return fNRuns; }
169
170 /// End of recursive chain of calls, does nothing
171 void AddFilterName(std::vector<std::string> &) {}
172 /// For each booked filter, returns either the name or "Unnamed Filter"
173 std::vector<std::string> GetFiltersNames();
174
175 /// For all the actions, either booked or run
176 std::vector<RDFInternal::RActionBase *> GetAllActions();
177
178 std::vector<RDFInternal::RActionBase *> GetBookedActions() { return fBookedActions; }
179 std::shared_ptr<ROOT::Internal::RDF::GraphDrawing::GraphNode> GetGraph();
180
181 const ColumnNames_t &GetBranchNames();
182};
183
184} // ns RDF
185} // ns Detail
186} // ns ROOT
187
188#endif
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
long long Long64_t
Definition: RtypesCore.h:71
unsigned long long ULong64_t
Definition: RtypesCore.h:72
typedef void((*Func_t)())
TCallback(ULong64_t everyN, Callback_t &&f, unsigned int nSlots)
TOneTimeCallback(Callback_t &&f, unsigned int nSlots)
The head node of a RDF computation graph.
RLoopManager(TTree *tree, const ColumnNames_t &defaultBranches)
unsigned int fNRuns
Number of event loops run.
bool CheckFilters(unsigned int, Long64_t) final
void EvalChildrenCounts()
Trigger counting of number of children nodes for each node of the functional graph.
void CleanUpNodes()
Perform clean-up operations. To be called at the end of each event loop.
void RunEmptySource()
Run event loop with no source files, in sequence.
const std::map< std::string, std::string > & GetAliasMap() const
std::function< void(unsigned int)> Callback_t
void Report(ROOT::RDF::RCutFlowReport &rep) const final
Call FillReport on all booked filters.
std::vector< RFilterBase * > fBookedNamedFilters
Contains a subset of fBookedFilters, i.e. only the named filters.
void RunEmptySourceMT()
Run event loop with no source files, in parallel.
ULong64_t GetNEmptyEntries() const
RLoopManager & operator=(const RLoopManager &)=delete
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
unsigned int GetNRuns() const
::TDirectory * GetDirectory() const
std::vector< RDFInternal::RActionBase * > GetBookedActions()
std::shared_ptr< TTree > fTree
Shared pointer to the input TTree.
void RunTreeReader()
Run event loop over one or multiple ROOT files, in sequence.
std::vector< RDFInternal::RActionBase * > GetAllActions()
For all the actions, either booked or run.
void CleanUpTask(unsigned int slot)
Perform clean-up operations. To be called at the end of each task execution.
std::vector< RDFInternal::RActionBase * > fRunActions
Non-owning pointers to actions already run.
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
void AddFilterName(std::vector< std::string > &)
End of recursive chain of calls, does nothing.
std::vector< RRangeBase * > fBookedRanges
std::map< std::string, std::string > fAliasColumnNameMap
ColumnNameAlias-columnName pairs.
std::vector< TCallback > fCallbacks
Registered callbacks.
void RunAndCheckFilters(unsigned int slot, Long64_t entry)
Execute actions and make sure named filters are called for each event.
std::vector< RFilterBase * > fBookedFilters
std::vector< RDFInternal::RActionBase * > fBookedActions
Non-owning pointers to actions to be run.
std::shared_ptr< ROOT::Internal::RDF::GraphDrawing::GraphNode > GetGraph()
const ELoopType fLoopType
The kind of event loop that is going to be run (e.g. on ROOT files, on no files)
void AddColumnAlias(const std::string &alias, const std::string &colName)
ColumnNames_t fValidBranchNames
Cache of the tree/chain branch names. Never access directy, always use GetBranchNames().
void SetTree(const std::shared_ptr< TTree > &tree)
const ColumnNames_t & GetDefaultColumnNames() const
Return the list of default columns – empty if none was provided when constructing the RDataFrame.
RDataSource * GetDataSource() const
unsigned int GetNSlots() const
std::vector< TOneTimeCallback > fCallbacksOnce
Registered callbacks to invoke just once before running the loop.
void RunDataSourceMT()
Run event loop over data accessed through a DataSource, in parallel.
void PartialReport(ROOT::RDF::RCutFlowReport &) const final
End of recursive chain of calls, does nothing.
std::vector< std::string > GetFiltersNames()
For each booked filter, returns either the name or "Unnamed Filter".
RLoopManager(const RLoopManager &)=delete
const std::unique_ptr< RDataSource > fDataSource
Owning pointer to a data-source object. Null if no data-source.
const ColumnNames_t fDefaultColumns
void Book(RDFInternal::RActionBase *actionPtr)
void InitNodeSlots(TTreeReader *r, unsigned int slot)
Build TTreeReaderValues for all nodes This method loops over all filters, actions and other booked ob...
void RegisterCallback(ULong64_t everyNEvents, std::function< void(unsigned int)> &&f)
void RunDataSource()
Run event loop over data accessed through a DataSource, in sequence.
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
void RunTreeProcessorMT()
Run event loop over one or multiple ROOT files, in parallel.
void Deregister(RDFInternal::RActionBase *actionPtr)
void InitNodes()
Initialize all nodes of the functional graph before running the event loop.
RLoopManager * GetLoopManagerUnchecked() final
Base class for non-leaf nodes of the computational graph.
Definition: RNodeBase.hxx:41
unsigned int fNStopsReceived
Number of times that a children node signaled to stop processing entries.
Definition: RNodeBase.hxx:45
unsigned int fNChildren
Number of nodes of the functional graph hanging from this object.
Definition: RNodeBase.hxx:44
Helper class that provides the operation graph nodes.
Class used to create the operation graph to be printed in the dot representation.
Definition: GraphNode.hxx:26
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
Describe directory structure in memory.
Definition: TDirectory.h:40
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition: TTreeReader.h:43
A TTree represents a columnar dataset.
Definition: TTree.h:78
ColumnNames_t GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:151
ROOT type_traits extensions.
Definition: TypeTraits.hxx:21
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: StringConv.hxx:21
Definition: tree.py:1