Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RInterfaceBase.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud CERN 08/2022
2
3/*************************************************************************
4 * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_RINTERFACEBASE
12#define ROOT_RDF_RINTERFACEBASE
13
16#include <ROOT/RDF/RDisplay.hxx>
18#include <ROOT/RDataSource.hxx>
19#include <ROOT/RResultPtr.hxx>
20#include <ROOT/RStringView.hxx>
21#include <TError.h> // R__ASSERT
22
23#include <memory>
24#include <set>
25#include <string>
26#include <vector>
27
28namespace ROOT {
29namespace RDF {
30
31class RDFDescription;
32class RVariationsDescription;
33
34using ColumnNames_t = std::vector<std::string>;
35
38
39// clang-format off
40/**
41 * \class ROOT::Internal::RDF::RInterfaceBase
42 * \ingroup dataframe
43 * \brief The public interface to the RDataFrame federation of classes.
44 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
45 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
46 *
47 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
48 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
49 */
50// clang-format on
52protected:
53 ///< The RLoopManager at the root of this computation graph. Never null.
55 /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
57
58 /// Contains the columns defined up to this node.
60
61 std::string DescribeDataset() const;
62
64
65 void CheckIMTDisabled(std::string_view callerName);
66
67 void AddDefaultColumns();
68
69 template <typename RetType>
70 void SanityChecksForVary(const std::vector<std::string> &colNames, const std::vector<std::string> &variationTags,
71 std::string_view variationName)
72 {
73 R__ASSERT(variationTags.size() > 0 && "Must have at least one variation.");
74 R__ASSERT(colNames.size() > 0 && "Must have at least one varied column.");
75 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
76
77 for (auto &colName : colNames) {
80 }
81 RDFInternal::CheckValidCppVarName(variationName, "Vary");
82
83 static_assert(RDFInternal::IsRVec<RetType>::value, "Vary expressions must return an RVec.");
84
85 if (colNames.size() > 1) { // we are varying multiple columns simultaneously, RetType is RVec<RVec<T>>
87 if (!hasInnerRVec)
88 throw std::runtime_error("This Vary call is varying multiple columns simultaneously but the expression "
89 "does not return an RVec of RVecs.");
90
91 auto colTypes = GetColumnTypeNamesList(colNames);
92 auto allColTypesEqual =
93 std::all_of(colTypes.begin() + 1, colTypes.end(), [&](const std::string &t) { return t == colTypes[0]; });
94 if (!allColTypesEqual)
95 throw std::runtime_error("Cannot simultaneously vary multiple columns of different types.");
96
97 const auto &innerTypeID = typeid(RDFInternal::InnerValueType_t<RetType>);
98
99 for (auto i = 0u; i < colTypes.size(); ++i) {
100 const auto *define = fColRegister.GetDefine(colNames[i]);
101 const auto &expectedTypeID = define ? define->GetTypeId() : RDFInternal::TypeName2TypeID(colTypes[i]);
102 if (innerTypeID != expectedTypeID)
103 throw std::runtime_error("Varied values for column \"" + colNames[i] + "\" have a different type (" +
104 RDFInternal::TypeID2TypeName(innerTypeID) + ") than the nominal value (" +
105 colTypes[i] + ").");
106 }
107 } else { // we are varying a single column, RetType is RVec<T>
108 const auto &retTypeID = typeid(typename RetType::value_type);
109 const auto &colName = colNames[0]; // we have only one element in there
110 const auto *define = fColRegister.GetDefine(colName);
111 const auto &expectedTypeID =
112 define ? define->GetTypeId() : RDFInternal::TypeName2TypeID(GetColumnType(colName));
113 if (retTypeID != expectedTypeID)
114 throw std::runtime_error("Varied values for column \"" + colName + "\" have a different type (" +
115 RDFInternal::TypeID2TypeName(retTypeID) + ") than the nominal value (" +
116 GetColumnType(colName) + ").");
117 }
118
119 // when varying multiple columns, they must be different columns
120 if (colNames.size() > 1) {
121 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
122 if (uniqueCols.size() != colNames.size())
123 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
124 }
125 }
126
128
129 ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
130 {
132 }
133
134 template <typename... ColumnTypes>
136 {
137 if (fDataSource != nullptr)
138 RDFInternal::AddDSColumns(validCols, *fLoopManager, *fDataSource, typeList, fColRegister);
139 }
140
141 /// Create RAction object, return RResultPtr for the action
142 /// Overload for the case in which all column types were specified (no jitting).
143 /// For most actions, `r` and `helperArg` will refer to the same object, because the only argument to forward to
144 /// the action helper is the result value itself. We need the distinction for actions such as Snapshot or Cache,
145 /// for which the constructor arguments of the action helper are different from the returned value.
146 template <typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode,
147 typename HelperArgType = ActionResultType,
148 std::enable_if_t<!RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
149 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
150 const std::shared_ptr<HelperArgType> &helperArg,
151 const std::shared_ptr<RDFNode> &proxiedPtr, const int /*nColumns*/ = -1)
152 {
153 constexpr auto nColumns = sizeof...(ColTypes);
154
155 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
157
158 const auto nSlots = fLoopManager->GetNSlots();
159
160 auto action = RDFInternal::BuildAction<ColTypes...>(validColumnNames, helperArg, nSlots, proxiedPtr, ActionTag{},
162 return MakeResultPtr(r, *fLoopManager, std::move(action));
163 }
164
165 /// Create RAction object, return RResultPtr for the action
166 /// Overload for the case in which one or more column types were not specified (RTTI + jitting).
167 /// This overload has a `nColumns` optional argument. If present, the number of required columns for
168 /// this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(ColTypes).
169 template <typename ActionTag, typename... ColTypes, typename ActionResultType, typename RDFNode,
170 typename HelperArgType = ActionResultType,
171 std::enable_if_t<RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
172 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
173 const std::shared_ptr<HelperArgType> &helperArg,
174 const std::shared_ptr<RDFNode> &proxiedPtr, const int nColumns = -1)
175 {
176 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(ColTypes));
177
178 const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
179 const unsigned int nSlots = fLoopManager->GetNSlots();
180
181 auto *tree = fLoopManager->GetTree();
182 auto *helperArgOnHeap = RDFInternal::MakeSharedOnHeap(helperArg);
183
184 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(proxiedPtr));
185
186 const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(*fLoopManager, validColumnNames,
187 fColRegister, proxiedPtr->GetVariations());
188 auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
189
190 auto toJit =
191 RDFInternal::JitBuildAction(validColumnNames, upcastNodeOnHeap, typeid(HelperArgType), typeid(ActionTag),
192 helperArgOnHeap, tree, nSlots, fColRegister, fDataSource, jittedActionOnHeap);
193 fLoopManager->ToJitExec(toJit);
194 return MakeResultPtr(r, *fLoopManager, std::move(jittedAction));
195 }
196
197public:
198 RInterfaceBase(std::shared_ptr<RDFDetail::RLoopManager> lm);
200
202
203 std::string GetColumnType(std::string_view column);
204
206
208 bool HasColumn(std::string_view columnName);
210 unsigned int GetNSlots() const;
211 unsigned int GetNRuns() const;
212};
213} // namespace RDF
214} // namespace ROOT
215
216#endif
#define R__ASSERT(e)
Definition TError.h:117
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
virtual const std::type_info & GetTypeId() const =0
The head node of a RDF computation graph.
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
A binder for user-defined columns, variations and aliases.
RDFDetail::RDefineBase * GetDefine(const std::string &colName) const
Return the RDefine for the requested column name, or nullptr.
A DFDescription contains useful information about a given RDataFrame computation graph.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
RDFDescription Describe()
Return information about the dataframe.
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
RDFDetail::RLoopManager * fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
unsigned int GetNRuns() const
Gets the number of event loops run.
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
void SanityChecksForVary(const std::vector< std::string > &colNames, const std::vector< std::string > &variationTags, std::string_view variationName)
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNSlots() const
Gets the number of data processing slots.
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
std::string DescribeDataset() const
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int nColumns=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which one or more co...
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RDFDetail::RLoopManager * GetLoopManager() const
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
Smart pointer for the return type of actions.
A descriptor for the systematic variations known to a given RDataFrame node.
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition RDFUtils.cxx:51
void CheckValidCppVarName(std::string_view var, const std::string &where)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:99
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
std::vector< std::string > ColumnNames_t
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
Definition tree.py:1
Lightweight storage for a collection of types.