Logo ROOT   6.21/01
Reference Guide
RLoopManager.hxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_RLOOPMANAGER
12 #define ROOT_RLOOPMANAGER
13 
14 #include "ROOT/RDF/RNodeBase.hxx"
15 #include "ROOT/RDF/NodesUtils.hxx"
16 
17 #include <functional>
18 #include <map>
19 #include <memory>
20 #include <string>
21 #include <vector>
22 
23 // forward declarations
24 class TTreeReader;
25 
26 namespace ROOT {
27 namespace RDF {
28 class RCutFlowReport;
29 class RDataSource;
30 } // ns RDF
31 
32 namespace Internal {
33 namespace RDF {
34 ColumnNames_t GetBranchNames(TTree &t, bool allowDuplicates = true);
35 
36 class RActionBase;
37 class GraphNode;
38 
39 namespace GraphDrawing {
40 class GraphCreatorHelper;
41 } // ns GraphDrawing
42 } // ns RDF
43 } // ns Internal
44 
45 namespace Detail {
46 namespace RDF {
47 using namespace ROOT::TypeTraits;
49 
50 class RCustomColumnBase;
51 class RFilterBase;
52 class RRangeBase;
53 
54 /// The head node of a RDF computation graph.
55 /// This class is responsible of running the event loop.
56 class RLoopManager : public RNodeBase {
58  enum class ELoopType { kROOTFiles, kROOTFilesMT, kNoFiles, kNoFilesMT, kDataSource, kDataSourceMT };
59  using Callback_t = std::function<void(unsigned int)>;
60  class TCallback {
63  std::vector<ULong64_t> fCounters;
64 
65  public:
66  TCallback(ULong64_t everyN, Callback_t &&f, unsigned int nSlots)
67  : fFun(std::move(f)), fEveryN(everyN), fCounters(nSlots, 0ull)
68  {
69  }
70 
71  void operator()(unsigned int slot)
72  {
73  auto &c = fCounters[slot];
74  ++c;
75  if (c == fEveryN) {
76  c = 0ull;
77  fFun(slot);
78  }
79  }
80  };
81 
84  std::vector<int> fHasBeenCalled; // std::vector<bool> is thread-unsafe for our purposes (and generally evil)
85 
86  public:
87  TOneTimeCallback(Callback_t &&f, unsigned int nSlots) : fFun(std::move(f)), fHasBeenCalled(nSlots, 0) {}
88 
89  void operator()(unsigned int slot)
90  {
91  if (fHasBeenCalled[slot] == 1)
92  return;
93  fFun(slot);
94  fHasBeenCalled[slot] = 1;
95  }
96  };
97 
98  std::vector<RDFInternal::RActionBase *> fBookedActions; ///< Non-owning pointers to actions to be run
99  std::vector<RDFInternal::RActionBase *> fRunActions; ///< Non-owning pointers to actions already run
100  std::vector<RFilterBase *> fBookedFilters;
101  std::vector<RFilterBase *> fBookedNamedFilters; ///< Contains a subset of fBookedFilters, i.e. only the named filters
102  std::vector<RRangeBase *> fBookedRanges;
103 
104  /// Shared pointer to the input TTree. It does not delete the pointee if the TTree/TChain was passed directly as an
105  /// argument to RDataFrame's ctor (in which case we let users retain ownership).
106  std::shared_ptr<TTree> fTree{nullptr};
108  const ULong64_t fNEmptyEntries{0};
109  const unsigned int fNSlots{1};
110  bool fMustRunNamedFilters{true};
111  const ELoopType fLoopType; ///< The kind of event loop that is going to be run (e.g. on ROOT files, on no files)
112  std::string fToJitDeclare; ///< Code that should be just-in-time declared right before the event loop
113  std::string fToJitExec; ///< Code that should be just-in-time executed right before the event loop
114  const std::unique_ptr<RDataSource> fDataSource; ///< Owning pointer to a data-source object. Null if no data-source
115  std::map<std::string, std::string> fAliasColumnNameMap; ///< ColumnNameAlias-columnName pairs
116  std::vector<TCallback> fCallbacks; ///< Registered callbacks
117  std::vector<TOneTimeCallback> fCallbacksOnce; ///< Registered callbacks to invoke just once before running the loop
118  /// A unique ID that identifies the computation graph that starts with this RLoopManager.
119  /// Used, for example, to jit objects in a namespace reserved for this computation graph
120  const unsigned int fID = GetNextID();
121  unsigned int fNRuns{0}; ///< Number of event loops run
122 
123  std::vector<RCustomColumnBase *> fCustomColumns; ///< Non-owning container of all custom columns created so far.
124  /// Cache of the tree/chain branch names. Never access directy, always use GetBranchNames().
126 
127  void CheckIndexedFriends();
128  void RunEmptySourceMT();
129  void RunEmptySource();
130  void RunTreeProcessorMT();
131  void RunTreeReader();
132  void RunDataSourceMT();
133  void RunDataSource();
134  void RunAndCheckFilters(unsigned int slot, Long64_t entry);
135  void InitNodeSlots(TTreeReader *r, unsigned int slot);
136  void InitNodes();
137  void CleanUpNodes();
138  void CleanUpTask(unsigned int slot);
139  void EvalChildrenCounts();
140  static unsigned int GetNextID();
141 
142 public:
143  RLoopManager(TTree *tree, const ColumnNames_t &defaultBranches);
144  RLoopManager(ULong64_t nEmptyEntries);
145  RLoopManager(std::unique_ptr<RDataSource> ds, const ColumnNames_t &defaultBranches);
146  RLoopManager(const RLoopManager &) = delete;
147  RLoopManager &operator=(const RLoopManager &) = delete;
148 
149  void JitDeclarations();
150  void Jit();
151  RLoopManager *GetLoopManagerUnchecked() final { return this; }
152  void Run();
153  const ColumnNames_t &GetDefaultColumnNames() const;
154  TTree *GetTree() const;
155  ::TDirectory *GetDirectory() const;
156  ULong64_t GetNEmptyEntries() const { return fNEmptyEntries; }
157  RDataSource *GetDataSource() const { return fDataSource.get(); }
158  void Book(RDFInternal::RActionBase *actionPtr);
159  void Deregister(RDFInternal::RActionBase *actionPtr);
160  void Book(RFilterBase *filterPtr);
161  void Deregister(RFilterBase *filterPtr);
162  void Book(RRangeBase *rangePtr);
163  void Deregister(RRangeBase *rangePtr);
164  bool CheckFilters(unsigned int, Long64_t) final;
165  unsigned int GetNSlots() const { return fNSlots; }
166  void Report(ROOT::RDF::RCutFlowReport &rep) const final;
167  /// End of recursive chain of calls, does nothing
169  void SetTree(const std::shared_ptr<TTree> &tree) { fTree = tree; }
170  void IncrChildrenCount() final { ++fNChildren; }
171  void StopProcessing() final { ++fNStopsReceived; }
172  void ToJitDeclare(const std::string &s) { fToJitDeclare.append(s); }
173  void ToJitExec(const std::string &s) { fToJitExec.append(s); }
174  void AddColumnAlias(const std::string &alias, const std::string &colName) { fAliasColumnNameMap[alias] = colName; }
175  const std::map<std::string, std::string> &GetAliasMap() const { return fAliasColumnNameMap; }
176  void RegisterCallback(ULong64_t everyNEvents, std::function<void(unsigned int)> &&f);
177  unsigned int GetID() const { return fID; }
178  unsigned int GetNRuns() const { return fNRuns; }
179 
180  /// End of recursive chain of calls, does nothing
181  void AddFilterName(std::vector<std::string> &) {}
182  /// For each booked filter, returns either the name or "Unnamed Filter"
183  std::vector<std::string> GetFiltersNames();
184 
185  /// For all the actions, either booked or run
186  std::vector<RDFInternal::RActionBase *> GetAllActions();
187 
188  void RegisterCustomColumn(RCustomColumnBase *column) { fCustomColumns.push_back(column); }
189 
191  {
192  fCustomColumns.erase(std::remove(fCustomColumns.begin(), fCustomColumns.end(), column), fCustomColumns.end());
193  }
194 
195  std::vector<RDFInternal::RActionBase *> GetBookedActions() { return fBookedActions; }
196  std::shared_ptr<ROOT::Internal::RDF::GraphDrawing::GraphNode> GetGraph();
197 
198  const ColumnNames_t &GetBranchNames();
199 };
200 
201 } // ns RDF
202 } // ns Detail
203 } // ns ROOT
204 
205 #endif
void AddFilterName(std::vector< std::string > &)
End of recursive chain of calls, does nothing.
The head node of a RDF computation graph.
long long Long64_t
Definition: RtypesCore.h:69
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree...
Definition: TTreeReader.h:43
Returns the available number of logical cores.
Definition: StringConv.hxx:21
std::map< std::string, std::string > fAliasColumnNameMap
ColumnNameAlias-columnName pairs.
#define f(i)
Definition: RSha256.hxx:104
const ColumnNames_t fDefaultColumns
std::string fToJitDeclare
Code that should be just-in-time declared right before the event loop.
void ToJitDeclare(const std::string &s)
STL namespace.
TCallback(ULong64_t everyN, Callback_t &&f, unsigned int nSlots)
const ELoopType fLoopType
The kind of event loop that is going to be run (e.g. on ROOT files, on no files)
unsigned int GetNRuns() const
Base class for non-leaf nodes of the computational graph.
Definition: RNodeBase.hxx:41
void ToJitExec(const std::string &s)
void PartialReport(ROOT::RDF::RCutFlowReport &) const final
End of recursive chain of calls, does nothing.
RLoopManager * GetLoopManagerUnchecked() final
unsigned int GetNSlots() const
ULong64_t GetNEmptyEntries() const
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:151
static constexpr double s
std::vector< RFilterBase * > fBookedFilters
TOneTimeCallback(Callback_t &&f, unsigned int nSlots)
std::vector< RDFInternal::RActionBase * > fBookedActions
Non-owning pointers to actions to be run.
std::vector< RRangeBase * > fBookedRanges
ROOT::R::TRInterface & r
Definition: Object.C:4
std::vector< TCallback > fCallbacks
Registered callbacks.
void DeRegisterCustomColumn(RCustomColumnBase *column)
void AddColumnAlias(const std::string &alias, const std::string &colName)
ColumnNames_t fValidBranchNames
Cache of the tree/chain branch names. Never access directy, always use GetBranchNames().
std::vector< RDFInternal::RActionBase * > GetBookedActions()
std::vector< RDFInternal::RActionBase * > fRunActions
Non-owning pointers to actions already run.
Describe directory structure in memory.
Definition: TDirectory.h:34
unsigned long long ULong64_t
Definition: RtypesCore.h:70
std::vector< RFilterBase * > fBookedNamedFilters
Contains a subset of fBookedFilters, i.e. only the named filters.
ROOT type_traits extensions.
Definition: TypeTraits.hxx:23
std::vector< RCustomColumnBase * > fCustomColumns
Non-owning container of all custom columns created so far.
Binding & operator=(OUT(*fun)(void))
const std::unique_ptr< RDataSource > fDataSource
Owning pointer to a data-source object. Null if no data-source.
void SetTree(const std::shared_ptr< TTree > &tree)
RDataSource * GetDataSource() const
void RegisterCustomColumn(RCustomColumnBase *column)
#define c(i)
Definition: RSha256.hxx:101
ColumnNames_t GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
Definition: tree.py:1
A TTree represents a columnar dataset.
Definition: TTree.h:72
const std::map< std::string, std::string > & GetAliasMap() const
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
std::function< void(unsigned int)> Callback_t
ROOT::Detail::RDF::ColumnNames_t ColumnNames_t
Definition: RDataFrame.cxx:797
std::string fToJitExec
Code that should be just-in-time executed right before the event loop.
std::vector< TOneTimeCallback > fCallbacksOnce
Registered callbacks to invoke just once before running the loop.