Logo ROOT  
Reference Guide
RDFUtils.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#include "RConfigure.h" // R__USE_IMT
12#include "ROOT/RDataSource.hxx"
15#include "RtypesCore.h"
16#include "TBranch.h"
17#include "TBranchElement.h"
18#include "TClass.h"
19#include "TClassEdit.h"
20#include "TClassRef.h"
21#include "TInterpreter.h"
22#include "TLeaf.h"
23#include "TROOT.h" // IsImplicitMTEnabled, GetThreadPoolSize
24#include "TTree.h"
25
26#include <stdexcept>
27#include <string>
28#include <cstring>
29#include <typeinfo>
30
31using namespace ROOT::Detail::RDF;
32using namespace ROOT::RDF;
33
34namespace ROOT {
35namespace Internal {
36namespace RDF {
37
38/// Return the type_info associated to a name. If the association fails, an
39/// exception is thrown.
40/// References and pointers are not supported since those cannot be stored in
41/// columns.
42const std::type_info &TypeName2TypeID(const std::string &name)
43{
44 if (auto c = TClass::GetClass(name.c_str())) {
45 return *c->GetTypeInfo();
46 } else if (name == "char" || name == "Char_t")
47 return typeid(char);
48 else if (name == "unsigned char" || name == "UChar_t")
49 return typeid(unsigned char);
50 else if (name == "int" || name == "Int_t")
51 return typeid(int);
52 else if (name == "unsigned int" || name == "UInt_t")
53 return typeid(unsigned int);
54 else if (name == "short" || name == "Short_t")
55 return typeid(short);
56 else if (name == "unsigned short" || name == "UShort_t")
57 return typeid(unsigned short);
58 else if (name == "long" || name == "Long_t")
59 return typeid(long);
60 else if (name == "unsigned long" || name == "ULong_t")
61 return typeid(unsigned long);
62 else if (name == "double" || name == "Double_t")
63 return typeid(double);
64 else if (name == "float" || name == "Float_t")
65 return typeid(float);
66 else if (name == "long long" || name == "long long int" || name == "Long64_t")
67 return typeid(Long64_t);
68 else if (name == "unsigned long long" || name == "unsigned long long int" || name == "ULong64_t")
69 return typeid(ULong64_t);
70 else if (name == "bool" || name == "Bool_t")
71 return typeid(bool);
72 else {
73 std::string msg("Cannot extract type_info of type ");
74 msg += name.c_str();
75 msg += ".";
76 throw std::runtime_error(msg);
77 }
78}
79
80/// Returns the name of a type starting from its type_info
81/// An empty string is returned in case of failure
82/// References and pointers are not supported since those cannot be stored in
83/// columns.
84std::string TypeID2TypeName(const std::type_info &id)
85{
86 if (auto c = TClass::GetClass(id)) {
87 return c->GetName();
88 } else if (id == typeid(char))
89 return "char";
90 else if (id == typeid(unsigned char))
91 return "unsigned char";
92 else if (id == typeid(int))
93 return "int";
94 else if (id == typeid(unsigned int))
95 return "unsigned int";
96 else if (id == typeid(short))
97 return "short";
98 else if (id == typeid(unsigned short))
99 return "unsigned short";
100 else if (id == typeid(long))
101 return "long";
102 else if (id == typeid(unsigned long))
103 return "unsigned long";
104 else if (id == typeid(double))
105 return "double";
106 else if (id == typeid(float))
107 return "float";
108 else if (id == typeid(Long64_t))
109 return "Long64_t";
110 else if (id == typeid(ULong64_t))
111 return "ULong64_t";
112 else if (id == typeid(bool))
113 return "bool";
114 else
115 return "";
116}
117
118std::string ComposeRVecTypeName(const std::string &valueType)
119{
120 return "ROOT::VecOps::RVec<" + valueType + ">";
121}
122
123std::string GetLeafTypeName(TLeaf *leaf, const std::string &colName)
124{
125 std::string colType = leaf->GetTypeName();
126 if (colType.empty())
127 throw std::runtime_error("Could not deduce type of leaf " + colName);
128 if (leaf->GetLeafCount() != nullptr && leaf->GetLenStatic() == 1) {
129 // this is a variable-sized array
130 colType = ComposeRVecTypeName(colType);
131 } else if (leaf->GetLeafCount() == nullptr && leaf->GetLenStatic() > 1) {
132 // this is a fixed-sized array (we do not differentiate between variable- and fixed-sized arrays)
133 colType = ComposeRVecTypeName(colType);
134 } else if (leaf->GetLeafCount() != nullptr && leaf->GetLenStatic() > 1) {
135 // we do not know how to deal with this branch
136 throw std::runtime_error("TTree leaf " + colName +
137 " has both a leaf count and a static length. This is not supported.");
138 }
139
140 return colType;
141}
142
143/// Return the typename of object colName stored in t, if any. Return an empty string if colName is not in t.
144/// Supported cases:
145/// - leaves corresponding to single values, variable- and fixed-length arrays, with following syntax:
146/// - "leafname", as long as TTree::GetLeaf resolves it
147/// - "b1.b2...leafname", as long as TTree::GetLeaf("b1.b2....", "leafname") resolves it
148/// - TBranchElements, as long as TTree::GetBranch resolves their names
149std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
150{
151 // look for TLeaf either with GetLeaf(colName) or with GetLeaf(branchName, leafName) (splitting on last dot)
152 auto leaf = t.GetLeaf(colName.c_str());
153 if (!leaf) {
154 const auto dotPos = colName.find_last_of('.');
155 const auto hasDot = dotPos != std::string::npos;
156 if (hasDot) {
157 const auto branchName = colName.substr(0, dotPos);
158 const auto leafName = colName.substr(dotPos + 1);
159 leaf = t.GetLeaf(branchName.c_str(), leafName.c_str());
160
161 // FIXME GetLeaf("a.b") and GetLeaf("a", "b") might fail while GetBranch("a.b") might work, even if a leaf
162 // called "a.b" exists. If that's the case, however, we don't want branch->GetCurrentClass()->GetName() as the
163 // type, because GetCurrentClass() returns the type of the top-level branch.
164 // So as a last resort, let's check if we manage to get to the leaf from the TBranch.
165 // To be revised once the TLeaf part of ROOT-10942 is fixed (see the ticket for more context).
166 auto branch = t.GetBranch(colName.c_str());
167 if (branch) {
168 auto leaves = branch->GetListOfLeaves();
169 if (leaves->GetEntries() == 1 && branch->GetListOfBranches()->GetEntries() == 0 &&
170 static_cast<TLeaf *>(leaves->At(0))->GetFullName() == colName)
171 return GetLeafTypeName(static_cast<TLeaf *>(leaves->At(0)), colName);
172 }
173 }
174 }
175 if (leaf)
176 return GetLeafTypeName(leaf, colName);
177
178 // we could not find a leaf named colName, so we look for a TBranchElement
179 auto branch = t.GetBranch(colName.c_str());
180 if (branch) {
181 static const TClassRef tbranchelement("TBranchElement");
182 if (branch->InheritsFrom(tbranchelement)) {
183 auto be = static_cast<TBranchElement *>(branch);
184 if (auto currentClass = be->GetCurrentClass())
185 return currentClass->GetName();
186 else {
187 // Here we have a special case for getting right the type of data members
188 // of classes sorted in TClonesArrays: ROOT-9674
189 auto mother = be->GetMother();
190 if (mother && mother->InheritsFrom(tbranchelement) && mother != be) {
191 auto beMom = static_cast<TBranchElement *>(mother);
192 auto beMomClass = beMom->GetClass();
193 if (beMomClass && 0 == std::strcmp("TClonesArray", beMomClass->GetName()))
194 return be->GetTypeName();
195 }
196 return be->GetClassName();
197 }
198 }
199 }
200
201 // colName is not a leaf nor a TBranchElement
202 return std::string();
203}
204
205/// Return a string containing the type of the given branch. Works both with real TTree branches and with temporary
206/// column created by Define. Throws if type name deduction fails.
207/// Note that for fixed- or variable-sized c-style arrays the returned type name will be RVec<T>.
208/// vector2rvec specifies whether typename 'std::vector<T>' should be converted to 'RVec<T>' or returned as is
209/// customColID is only used if isCustomColumn is true, and must correspond to the custom column's unique identifier
210/// returned by its `GetID()` method.
211std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, RDataSource *ds,
212 RCustomColumnBase *customColumn, bool vector2rvec)
213{
214 std::string colType;
215
216 if (ds && ds->HasColumn(colName))
217 colType = ds->GetTypeName(colName);
218
219 if (colType.empty() && tree) {
220 colType = GetBranchOrLeafTypeName(*tree, colName);
221 if (vector2rvec && TClassEdit::IsSTLCont(colType) == ROOT::ESTLType::kSTLvector) {
222 std::vector<std::string> split;
223 int dummy;
224 TClassEdit::GetSplit(colType.c_str(), split, dummy);
225 auto &valueType = split[1];
226 colType = ComposeRVecTypeName(valueType);
227 }
228 }
229
230 if (colType.empty() && customColumn) {
231 colType = customColumn->GetTypeName();
232 }
233
234 if (colType.empty())
235 throw std::runtime_error("Column \"" + colName +
236 "\" is not in a dataset and is not a custom column been defined.");
237
238 return colType;
239}
240
241/// Convert type name (e.g. "Float_t") to ROOT type code (e.g. 'F') -- see TBranch documentation.
242/// Return a space ' ' in case no match was found.
243char TypeName2ROOTTypeName(const std::string &b)
244{
245 if (b == "Char_t" || b == "char")
246 return 'B';
247 if (b == "UChar_t" || b == "unsigned char")
248 return 'b';
249 if (b == "Short_t" || b == "short" || b == "short int")
250 return 'S';
251 if (b == "UShort_t" || b == "unsigned short" || b == "unsigned short int")
252 return 's';
253 if (b == "Int_t" || b == "int")
254 return 'I';
255 if (b == "UInt_t" || b == "unsigned" || b == "unsigned int")
256 return 'i';
257 if (b == "Float_t" || b == "float")
258 return 'F';
259 if (b == "Double_t" || b == "double")
260 return 'D';
261 if (b == "Long64_t" || b == "long" || b == "long int")
262 return 'L';
263 if (b == "ULong64_t" || b == "unsigned long" || b == "unsigned long int")
264 return 'l';
265 if (b == "Bool_t" || b == "bool")
266 return 'O';
267 return ' ';
268}
269
270unsigned int GetNSlots()
271{
272 unsigned int nSlots = 1;
273#ifdef R__USE_IMT
275 nSlots = ROOT::GetThreadPoolSize();
276#endif // R__USE_IMT
277 return nSlots;
278}
279
280/// Replace occurrences of '.' with '_' in each string passed as argument.
281/// An Info message is printed when this happens. Dots at the end of the string are not replaced.
282/// An exception is thrown in case the resulting set of strings would contain duplicates.
283std::vector<std::string> ReplaceDotWithUnderscore(const std::vector<std::string> &columnNames)
284{
285 auto newColNames = columnNames;
286 for (auto &col : newColNames) {
287 const auto dotPos = col.find('.');
288 if (dotPos != std::string::npos && dotPos != col.size() - 1 && dotPos != 0u) {
289 auto oldName = col;
290 std::replace(col.begin(), col.end(), '.', '_');
291 if (std::find(columnNames.begin(), columnNames.end(), col) != columnNames.end())
292 throw std::runtime_error("Column " + oldName + " would be written as " + col +
293 " but this column already exists. Please use Alias to select a new name for " +
294 oldName);
295 Info("Snapshot", "Column %s will be saved as %s", oldName.c_str(), col.c_str());
296 }
297 }
298
299 return newColNames;
300}
301
302void InterpreterDeclare(const std::string &code)
303{
304 if (!gInterpreter->Declare(code.c_str())) {
305 const auto msg =
306 "\nRDataFrame: An error occurred during just-in-time compilation. The lines above might indicate the cause of "
307 "the crash\n All RDF objects that have not run an event loop yet should be considered in an invalid state.\n";
308 throw std::runtime_error(msg);
309 }
310}
311
312Long64_t InterpreterCalc(const std::string &code, const std::string &context)
313{
315 auto res = gInterpreter->Calc(code.c_str(), &errorCode);
316 if (errorCode != TInterpreter::EErrorCode::kNoError) {
317 std::string msg = "\nAn error occurred during just-in-time compilation";
318 if (!context.empty())
319 msg += " in " + context;
320 msg += ". The lines above might indicate the cause of the crash\nAll RDF objects that have not run their event "
321 "loop yet should be considered in an invalid state.\n";
322 throw std::runtime_error(msg);
323 }
324 return res;
325}
326
327} // end NS RDF
328} // end NS Internal
329} // end NS ROOT
double
Definition: Converters.cxx:921
l unsigned short
Definition: Converters.cxx:862
long
Definition: Converters.cxx:858
#define b(i)
Definition: RSha256.hxx:100
#define c(i)
Definition: RSha256.hxx:101
static RooMathCoreReg dummy
long long Long64_t
Definition: RtypesCore.h:71
unsigned long long ULong64_t
Definition: RtypesCore.h:72
void Info(const char *location, const char *msgfmt,...)
char name[80]
Definition: TGX11.cxx:109
#define gInterpreter
Definition: TInterpreter.h:556
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual bool HasColumn(std::string_view) const =0
Checks if the dataset has a certain column.
virtual std::string GetTypeName(std::string_view) const =0
Type of a column as a string, e.g.
A Branch for the case of an object.
virtual TClass * GetClass() const
TObjArray * GetListOfLeaves()
Definition: TBranch.h:245
TClassRef is used to implement a permanent reference to a TClass object.
Definition: TClassRef.h:28
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2948
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition: TLeaf.h:49
virtual const char * GetTypeName() const
Definition: TLeaf.h:130
virtual TLeaf * GetLeafCount() const
If this leaf stores a variable-sized array or a multi-dimensional array whose last dimension has vari...
Definition: TLeaf.h:112
virtual TString GetFullName() const
Return the full name (including the parent's branch names) of the leaf.
Definition: TLeaf.cxx:202
virtual Int_t GetLenStatic() const
Return the fixed length of this leaf.
Definition: TLeaf.h:123
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
A TTree represents a columnar dataset.
Definition: TTree.h:78
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
Definition: TTree.cxx:5209
virtual TLeaf * GetLeaf(const char *branchname, const char *leafname)
Return pointer to the 1st Leaf named name in any Branch of this Tree or any branch in the list of fri...
Definition: TTree.cxx:6098
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition: RDFUtils.cxx:283
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition: RDFUtils.cxx:42
unsigned int GetNSlots()
Definition: RDFUtils.cxx:270
std::string ComposeRVecTypeName(const std::string &valueType)
Definition: RDFUtils.cxx:118
std::string GetLeafTypeName(TLeaf *leaf, const std::string &colName)
Definition: RDFUtils.cxx:123
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, RDataSource *ds, RCustomColumnBase *customColumn, bool vector2rvec)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:211
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition: RDFUtils.cxx:243
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:84
std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
Return the typename of object colName stored in t, if any.
Definition: RDFUtils.cxx:149
Long64_t InterpreterCalc(const std::string &code, const std::string &context)
Definition: RDFUtils.cxx:312
void InterpreterDeclare(const std::string &code)
Definition: RDFUtils.cxx:302
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: StringConv.hxx:21
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:557
UInt_t GetThreadPoolSize()
Returns the size of ROOT's thread pool.
Definition: TROOT.cxx:564
@ kSTLvector
Definition: ESTLType.h:30
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
int GetSplit(const char *type, std::vector< std::string > &output, int &nestedLoc, EModType mode=TClassEdit::kNone)
Stores in output (after emptying it) the split type.
Definition: tree.py:1