Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RColumnRegister.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo, Massimo Tumolo CERN 06/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_RCOLUMNREGISTER
12#define ROOT_RDF_RCOLUMNREGISTER
13
14#include <TString.h>
15
16#include <algorithm>
17#include <unordered_map>
18#include <memory>
19#include <string>
20#include <vector>
21
22namespace ROOT {
23namespace RDF {
24class RVariationsDescription;
25}
26namespace Detail {
27namespace RDF {
29class RDefineBase;
30class RLoopManager;
31}
32} // namespace Detail
33
34namespace Internal {
35namespace RDF {
36
38
39class RDefineReader;
40class RVariationBase;
41class RVariationReader;
42
43/// A helper type that keeps track of RDefine objects and their corresponding RDefineReaders.
46
47 // this is a shared_ptr only because we have to track its lifetime with a weak_ptr that we pass to jitted code
48 // (see BookDefineJit). it is never null.
49 std::shared_ptr<RDefineBase> fDefine;
50 // Column readers per variation (in the map) per slot (in the vector).
51 std::vector<std::unordered_map<std::string, std::unique_ptr<RDefineReader>>> fReadersPerVariation;
52
53public:
54 RDefinesWithReaders(std::shared_ptr<RDefineBase> define, unsigned int nSlots);
55 RDefineBase &GetDefine() const { return *fDefine; }
56 RDefineReader &GetReader(unsigned int slot, const std::string &variationName);
57};
58
60 // this is a shared_ptr only because we have to track its lifetime with a weak_ptr that we pass to jitted code
61 // (see BookVariationJit). it is never null.
62 std::shared_ptr<RVariationBase> fVariation;
63 // Column readers for this RVariation for a given variation (map key) and a given slot (vector element).
64 std::vector<std::unordered_map<std::string, std::unique_ptr<RVariationReader>>> fReadersPerVariation;
65
66public:
67 RVariationsWithReaders(std::shared_ptr<RVariationBase> variation, unsigned int nSlots);
69 RVariationReader &GetReader(unsigned int slot, const std::string &colName, const std::string &variationName);
70};
71
72/**
73 * \class ROOT::Internal::RDF::RColumnRegister
74 * \ingroup dataframe
75 * \brief A binder for user-defined columns, variations and aliases.
76 *
77 * The storage is copy-on-write and shared between all instances of the class that have the same values.
78 *
79 * Several components of an RDF computation graph make use of a column register. It keeps track of which columns have
80 * been defined, varied or aliased at each point of the computation graph.
81 * In many cases, the contents of the different column register instances are the same or only differ by a single
82 * extra defined/varied/aliased column. For this reason, in order to avoid unnecessary data duplication, fDefines,
83 * fAliases, fVariations and fColumnNames are all shared_ptr<const T> that (whenever possible) are shared across
84 * RColumnRegister instances that are part of the same computation graph. If a new column, alias or variation is added
85 * between one node and the next, then the new node contains a new instance of a RColumnRegister that shares all data
86 * members with the previous instance except for the one data member that needed updating, which is replaced with a new
87 * updated instance.
88 */
90 using ColumnNames_t = std::vector<std::string>;
91 using DefinesMap_t = std::unordered_map<std::string, std::shared_ptr<RDefinesWithReaders>>;
92 /// See fVariations for more information on this type.
93 using VariationsMap_t = std::unordered_multimap<std::string, std::shared_ptr<RVariationsWithReaders>>;
94
95 std::shared_ptr<RDFDetail::RLoopManager> fLoopManager;
96
97 /// Immutable collection of Defines, can be shared among several nodes.
98 /// The pointee changes if a new Define node is added to the RColumnRegister.
99 std::shared_ptr<DefinesMap_t> fDefines;
100 /// Immutable map of Aliases, can be shared among several nodes.
101 std::shared_ptr<const std::unordered_map<std::string, std::string>> fAliases;
102 /// Immutable multimap of Variations, can be shared among several nodes.
103 /// The key is the name of an existing column, the values are all variations that affect that column.
104 /// Variations that affect multiple columns are inserted in the map multiple times, once per column,
105 /// and conversely each column (i.e. each key) can have several associated variations.
106 std::shared_ptr<VariationsMap_t> fVariations;
107 std::shared_ptr<const ColumnNames_t> fColumnNames; ///< Names of Defines and Aliases registered so far.
108
109 void AddName(std::string_view name);
110
111 RVariationsWithReaders *FindVariationAndReaders(const std::string &colName, const std::string &variationName);
112
113public:
117
118 explicit RColumnRegister(std::shared_ptr<RDFDetail::RLoopManager> lm);
120
121 ////////////////////////////////////////////////////////////////////////////
122 /// \brief Return the list of the names of the defined columns (Defines + Aliases).
124
126
127 RDFDetail::RDefineBase *GetDefine(const std::string &colName) const;
128
129 bool IsDefineOrAlias(std::string_view name) const;
130
131 void AddDefine(std::shared_ptr<RDFDetail::RDefineBase> column);
132
133 void AddAlias(std::string_view alias, std::string_view colName);
134
135 bool IsAlias(const std::string &name) const;
136
137 std::string ResolveAlias(std::string_view alias) const;
138
139 void AddVariation(std::shared_ptr<RVariationBase> variation);
140
141 std::vector<std::string> GetVariationsFor(const std::string &column) const;
142
143 std::vector<std::string> GetVariationDeps(const std::string &column) const;
144
145 std::vector<std::string> GetVariationDeps(const ColumnNames_t &columns) const;
146
148
149 RDFDetail::RColumnReaderBase *GetReader(unsigned int slot, const std::string &colName,
150 const std::string &variationName, const std::type_info &tid);
151};
152
153} // Namespace RDF
154} // Namespace Internal
155} // Namespace ROOT
156
157#endif // ROOT_RDF_RCOLUMNREGISTER
char name[80]
Definition TGX11.cxx:110
Pure virtual base class for all column reader types.
A binder for user-defined columns, variations and aliases.
bool IsDefineOrAlias(std::string_view name) const
Check if the provided name is tracked in the names list.
RColumnRegister & operator=(const RColumnRegister &)=default
ColumnNames_t BuildDefineNames() const
Return the list of the names of defined columns (no aliases).
std::shared_ptr< const std::unordered_map< std::string, std::string > > fAliases
Immutable map of Aliases, can be shared among several nodes.
RDFDetail::RDefineBase * GetDefine(const std::string &colName) const
Return the RDefine for the requested column name, or nullptr.
bool IsAlias(const std::string &name) const
Return true if the given column name is an existing alias.
std::vector< std::string > GetVariationsFor(const std::string &column) const
Get the names of the variations that directly provide alternative values for this column.
RDFDetail::RColumnReaderBase * GetReader(unsigned int slot, const std::string &colName, const std::string &variationName, const std::type_info &tid)
Return a RDefineReader or a RVariationReader, or nullptr if not available.
std::shared_ptr< const ColumnNames_t > fColumnNames
Names of Defines and Aliases registered so far.
std::string ResolveAlias(std::string_view alias) const
Return the actual column name that the alias resolves to.
std::unordered_multimap< std::string, std::shared_ptr< RVariationsWithReaders > > VariationsMap_t
See fVariations for more information on this type.
void AddVariation(std::shared_ptr< RVariationBase > variation)
Register a new systematic variation.
std::shared_ptr< VariationsMap_t > fVariations
Immutable multimap of Variations, can be shared among several nodes.
void AddDefine(std::shared_ptr< RDFDetail::RDefineBase > column)
Add a new defined column.
RColumnRegister(const RColumnRegister &)=default
RColumnRegister(RColumnRegister &&)=default
std::vector< std::string > GetVariationDeps(const std::string &column) const
Get the names of all variations that directly or indirectly affect a given column.
ROOT::RDF::RVariationsDescription BuildVariationsDescription() const
ColumnNames_t GetNames() const
Return the list of the names of the defined columns (Defines + Aliases).
std::shared_ptr< DefinesMap_t > fDefines
Immutable collection of Defines, can be shared among several nodes.
void AddAlias(std::string_view alias, std::string_view colName)
Add a new alias to the ledger.
std::shared_ptr< RDFDetail::RLoopManager > fLoopManager
std::vector< std::string > ColumnNames_t
RVariationsWithReaders * FindVariationAndReaders(const std::string &colName, const std::string &variationName)
Return the RVariationsWithReaders object that handles the specified variation of the specified column...
void AddName(std::string_view name)
Add a new name to the list returned by GetNames without booking a new column.
std::unordered_map< std::string, std::shared_ptr< RDefinesWithReaders > > DefinesMap_t
Column reader for defined columns.
A helper type that keeps track of RDefine objects and their corresponding RDefineReaders.
std::vector< std::unordered_map< std::string, std::unique_ptr< RDefineReader > > > fReadersPerVariation
RDefineReader & GetReader(unsigned int slot, const std::string &variationName)
std::shared_ptr< RDefineBase > fDefine
This type includes all parts of RVariation that do not depend on the callable signature.
Column reader that reads the value for a specific column, variation and slot.
std::vector< std::unordered_map< std::string, std::unique_ptr< RVariationReader > > > fReadersPerVariation
RVariationReader & GetReader(unsigned int slot, const std::string &colName, const std::string &variationName)
Return a column reader for the given slot, column and variation.
std::shared_ptr< RVariationBase > fVariation
A descriptor for the systematic variations known to a given RDataFrame node.
std::vector< std::string > ColumnNames_t
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...