Logo ROOT   6.12/07
Reference Guide
TDFUtils.cxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #include "RConfigure.h" // R__USE_IMT
12 #include "ROOT/TDFNodes.hxx" // ColumnName2ColumnTypeName -> TCustomColumnBase, FindUnknownColumns -> TLoopManager
13 #include "ROOT/TDFUtils.hxx"
14 #include "TBranch.h"
15 #include "TBranchElement.h"
16 #include "TClassRef.h"
17 #include "TFriendElement.h"
18 #include "TROOT.h" // IsImplicitMTEnabled, GetImplicitMTPoolSize
19 
20 #include <stdexcept>
21 #include <string>
22 class TTree;
23 using namespace ROOT::Detail::TDF;
24 using namespace ROOT::Experimental::TDF;
25 
26 namespace ROOT {
27 namespace Internal {
28 namespace TDF {
29 
30 TIgnoreErrorLevelRAII::TIgnoreErrorLevelRAII(int errorIgnoreLevel)
31 {
32  gErrorIgnoreLevel = errorIgnoreLevel;
33 }
34 TIgnoreErrorLevelRAII::~TIgnoreErrorLevelRAII()
35 {
36  gErrorIgnoreLevel = fCurIgnoreErrorLevel;
37 }
38 
39 /// Return the type_info associated to a name. If the association fails, an
40 /// exception is thrown.
41 /// References and pointers are not supported since those cannot be stored in
42 /// columns.
43 const std::type_info &TypeName2TypeID(const std::string &name)
44 {
45  if (auto c = TClass::GetClass(name.c_str())) {
46  return *c->GetTypeInfo();
47  } else if (name == "char" || name == "Char_t")
48  return typeid(char);
49  else if (name == "unsigned char" || name == "UChar_t")
50  return typeid(unsigned char);
51  else if (name == "int" || name == "Int_t")
52  return typeid(int);
53  else if (name == "unsigned int" || name == "UInt_t")
54  return typeid(unsigned int);
55  else if (name == "short" || name == "Short_t")
56  return typeid(short);
57  else if (name == "unsigned short" || name == "UShort_t")
58  return typeid(unsigned short);
59  else if (name == "long" || name == "Long_t")
60  return typeid(long);
61  else if (name == "unsigned long" || name == "ULong_t")
62  return typeid(unsigned long);
63  else if (name == "double" || name == "Double_t")
64  return typeid(double);
65  else if (name == "float" || name == "Float_t")
66  return typeid(float);
67  else if (name == "long long" || name == "long long int" || name == "Long64_t")
68  return typeid(Long64_t);
69  else if (name == "unsigned long long" || name == "unsigned long long int" || name == "ULong64_t")
70  return typeid(ULong64_t);
71  else if (name == "bool" || name == "Bool_t")
72  return typeid(bool);
73  else {
74  std::string msg("Cannot extract type_info of type ");
75  msg += name.c_str();
76  msg += ".";
77  throw std::runtime_error(msg);
78  }
79 }
80 
81 /// Returns the name of a type starting from its type_info
82 /// An empty string is returned in case of failure
83 /// References and pointers are not supported since those cannot be stored in
84 /// columns.
85 std::string TypeID2TypeName(const std::type_info &id)
86 {
87  if (auto c = TClass::GetClass(id)) {
88  return c->GetName();
89  } else if (id == typeid(char))
90  return "char";
91  else if (id == typeid(unsigned char))
92  return "unsigned char";
93  else if (id == typeid(int))
94  return "int";
95  else if (id == typeid(unsigned int))
96  return "unsigned int";
97  else if (id == typeid(short))
98  return "short";
99  else if (id == typeid(unsigned short))
100  return "unsigned short";
101  else if (id == typeid(long))
102  return "long";
103  else if (id == typeid(unsigned long))
104  return "unsigned long";
105  else if (id == typeid(double))
106  return "double";
107  else if (id == typeid(float))
108  return "float";
109  else if (id == typeid(Long64_t))
110  return "Long64_t";
111  else if (id == typeid(ULong64_t))
112  return "ULong64_t";
113  else if (id == typeid(bool))
114  return "bool";
115  else
116  return "";
117 
118 }
119 
120 /// Return a string containing the type of the given branch. Works both with real TTree branches and with temporary
121 /// column created by Define. Returns an empty string if type name deduction fails.
122 std::string
123 ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, TCustomColumnBase *tmpBranch, TDataSource *ds)
124 {
125  // if this is a TDataSource column, we just ask the type name to the data-source
126  if (ds && ds->HasColumn(colName)) {
127  return ds->GetTypeName(colName);
128  }
129 
130  TBranch *branch = nullptr;
131  if (tree)
132  branch = tree->GetBranch(colName.c_str());
133  if (branch) {
134  // this must be a real TTree branch
135  static const TClassRef tbranchelRef("TBranchElement");
136  if (branch->InheritsFrom(tbranchelRef)) {
137  // this branch is not a fundamental type, we can ask for the class name
138  return static_cast<TBranchElement *>(branch)->GetClassName();
139  } else {
140  // this branch must be a fundamental type or array thereof
141  const auto listOfLeaves = branch->GetListOfLeaves();
142  const auto nLeaves = listOfLeaves->GetEntries();
143  if (nLeaves != 1)
144  throw std::runtime_error("TTree branch " + colName + " has " + std::to_string(nLeaves) +
145  " leaves. Only one leaf per branch is supported.");
146  TLeaf *l = static_cast<TLeaf *>(listOfLeaves->UncheckedAt(0));
147  const std::string branchType = l->GetTypeName();
148  if (branchType.empty()) {
149  throw std::runtime_error("could not deduce type of branch " + std::string(colName));
150  } else if (l->GetLeafCount() != nullptr && l->GetLenStatic() == 1) {
151  // this is a variable-sized array
152  return "ROOT::Experimental::TDF::TArrayBranch<" + branchType + ">";
153  } else if (l->GetLeafCount() == nullptr && l->GetLenStatic() > 1) {
154  // this is a fixed-sized array (we do not differentiate between variable- and fixed-sized arrays)
155  return "ROOT::Experimental::TDF::TArrayBranch<" + branchType + ">";
156  } else if (l->GetLeafCount() == nullptr && l->GetLenStatic() == 1) {
157  // this branch contains a single fundamental type
158  return l->GetTypeName();
159  } else {
160  // we do not know how to deal with this branch
161  throw std::runtime_error("TTree branch " + colName +
162  " has both a leaf count and a static length. This is not supported.");
163  }
164  }
165  } else if (tmpBranch) {
166  // this must be a temporary branch
167  auto &id = tmpBranch->GetTypeId();
168  auto typeName = TypeID2TypeName(id);
169  if (typeName.empty()) {
170  std::string msg("Cannot deduce type of temporary column ");
171  msg += colName;
172  msg += ". The typename is ";
173  msg += id.name();
174  msg += ".";
175  throw std::runtime_error(msg);
176  }
177  return typeName;
178  } else {
179  throw std::runtime_error("Column \"" + colName + "\" is not in a file and has not been defined.");
180  }
181 }
182 
183 /// Convert type name (e.g. "Float_t") to ROOT type code (e.g. 'F') -- see TBranch documentation.
184 /// Return a space ' ' in case no match was found.
185 char TypeName2ROOTTypeName(const std::string &b)
186 {
187  if (b == "Char_t")
188  return 'B';
189  if (b == "UChar_t")
190  return 'b';
191  if (b == "Short_t")
192  return 'S';
193  if (b == "UShort_t")
194  return 's';
195  if (b == "Int_t")
196  return 'I';
197  if (b == "UInt_t")
198  return 'i';
199  if (b == "Float_t")
200  return 'F';
201  if (b == "Double_t")
202  return 'D';
203  if (b == "Long64_t")
204  return 'L';
205  if (b == "ULong64_t")
206  return 'l';
207  if (b == "Bool_t")
208  return 'O';
209  return ' ';
210 }
211 
212 const char *ToConstCharPtr(const char *s)
213 {
214  return s;
215 }
216 
217 const char *ToConstCharPtr(const std::string &s)
218 {
219  return s.c_str();
220 }
221 
222 unsigned int GetNSlots()
223 {
224  unsigned int nSlots = 1;
225 #ifdef R__USE_IMT
227  nSlots = ROOT::GetImplicitMTPoolSize();
228 #endif // R__USE_IMT
229  return nSlots;
230 }
231 
232 // The set here is used as a registry, the real list, which keeps the order, is
233 // the one in the vector
234 void GetBranchNamesImpl(TTree &t, std::set<std::string> &bNamesReg, ColumnNames_t &bNames,
235  std::set<TTree *> &analysedTrees)
236 {
237 
238  if (!analysedTrees.insert(&t).second) {
239  return;
240  }
241 
242  auto branches = t.GetListOfBranches();
243  if (branches) {
244  for (auto branchObj : *branches) {
245  auto name = branchObj->GetName();
246  if (bNamesReg.insert(name).second) {
247  bNames.emplace_back(name);
248  }
249  }
250  }
251 
252  auto friendTrees = t.GetListOfFriends();
253 
254  if (!friendTrees)
255  return;
256 
257  for (auto friendTreeObj : *friendTrees) {
258  auto friendTree = ((TFriendElement *)friendTreeObj)->GetTree();
259  GetBranchNamesImpl(*friendTree, bNamesReg, bNames, analysedTrees);
260  }
261 }
262 
263 ///////////////////////////////////////////////////////////////////////////////
264 /// Get all the branches names, including the ones of the friend trees
265 ColumnNames_t GetBranchNames(TTree &t)
266 {
267  std::set<std::string> bNamesSet;
268  ColumnNames_t bNames;
269  std::set<TTree *> analysedTrees;
270  GetBranchNamesImpl(t, bNamesSet, bNames, analysedTrees);
271  return bNames;
272 }
273 
274 void CheckCustomColumn(std::string_view definedCol, TTree *treePtr, const ColumnNames_t &customCols,
275  const ColumnNames_t &dataSourceColumns)
276 {
277  const std::string definedColStr(definedCol);
278  if (treePtr != nullptr) {
279  // check if definedCol is already present in TTree
280  const auto branch = treePtr->GetBranch(definedColStr.c_str());
281  if (branch != nullptr) {
282  const auto msg = "branch \"" + definedColStr + "\" already present in TTree";
283  throw std::runtime_error(msg);
284  }
285  }
286  // check if definedCol has already been `Define`d in the functional graph
287  if (std::find(customCols.begin(), customCols.end(), definedCol) != customCols.end()) {
288  const auto msg = "Redefinition of column \"" + definedColStr + "\"";
289  throw std::runtime_error(msg);
290  }
291  // check if definedCol is already present in the DataSource (but has not yet been `Define`d)
292  if (!dataSourceColumns.empty()) {
293  if (std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedCol) != dataSourceColumns.end()) {
294  const auto msg = "Redefinition of column \"" + definedColStr + "\" already present in the data-source";
295  throw std::runtime_error(msg);
296  }
297  }
298 }
299 
300 void CheckSnapshot(unsigned int nTemplateParams, unsigned int nColumnNames)
301 {
302  if (nTemplateParams != nColumnNames) {
303  std::string err_msg = "The number of template parameters specified for the snapshot is ";
304  err_msg += std::to_string(nTemplateParams);
305  err_msg += " while ";
306  err_msg += std::to_string(nColumnNames);
307  err_msg += " columns have been specified.";
308  throw std::runtime_error(err_msg);
309  }
310 }
311 
312 /// Choose between local column names or default column names, throw in case of errors.
313 const ColumnNames_t
314 SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
315 {
316  if (names.empty()) {
317  // use default column names
318  if (defaultNames.size() < nRequiredNames)
319  throw std::runtime_error(
320  std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
321  " required but none were provided and the default list has size " + std::to_string(defaultNames.size()));
322  // return first nRequiredNames default column names
323  return ColumnNames_t(defaultNames.begin(), defaultNames.begin() + nRequiredNames);
324  } else {
325  // use column names provided by the user to this particular transformation/action
326  if (names.size() != nRequiredNames) {
327  auto msg = std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
328  " required but " + std::to_string(names.size()) + (names.size() == 1 ? " was" : " were") +
329  " provided:";
330  for (const auto &name : names)
331  msg += " \"" + name + "\",";
332  msg.back() = '.';
333  throw std::runtime_error(msg);
334  }
335  return names;
336  }
337 }
338 
339 ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, TTree *tree, const ColumnNames_t &definedCols,
340  const ColumnNames_t &dataSourceColumns)
341 {
342  ColumnNames_t unknownColumns;
343  for (auto &column : requiredCols) {
344  const auto isTreeBranch = (tree != nullptr && tree->GetBranch(column.c_str()) != nullptr);
345  if (isTreeBranch)
346  continue;
347  const auto isCustomColumn = std::find(definedCols.begin(), definedCols.end(), column) != definedCols.end();
348  if (isCustomColumn)
349  continue;
350  const auto isDataSourceColumn =
351  std::find(dataSourceColumns.begin(), dataSourceColumns.end(), column) != dataSourceColumns.end();
352  if (isDataSourceColumn)
353  continue;
354  unknownColumns.emplace_back(column);
355  }
356  return unknownColumns;
357 }
358 
360 {
361  return 0 == colName.find("tdf") && '_' == colName.back();
362 }
363 
364 } // end NS TDF
365 } // end NS Internal
366 } // end NS ROOT
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: TDFUtils.cxx:85
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition: TLeaf.h:32
UInt_t GetImplicitMTPoolSize()
Returns the size of the pool used for implicit multi-threading.
Definition: TROOT.cxx:591
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition: TDFUtils.cxx:43
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition: TDFUtils.cxx:185
long long Long64_t
Definition: RtypesCore.h:69
R__EXTERN Int_t gErrorIgnoreLevel
Definition: TError.h:105
basic_string_view< char > string_view
Definition: RStringView.h:35
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, TTree *tree, const ColumnNames_t &definedCols, const ColumnNames_t &dataSourceColumns)
Definition: TDFUtils.cxx:339
TTree()
Default constructor and I/O constructor.
Definition: TTree.cxx:691
virtual const char * GetTypeName() const
Definition: TLeaf.h:82
void CheckCustomColumn(std::string_view definedCol, TTree *treePtr, const ColumnNames_t &customCols, const ColumnNames_t &dataSourceColumns)
Definition: TDFUtils.cxx:274
const char * ToConstCharPtr(const std::string &s)
Definition: TDFUtils.cxx:217
bool IsInternalColumn(std::string_view colName)
Definition: TDFUtils.cxx:359
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
Definition: TDFUtils.cxx:314
TDataSource defines an API that TDataFrame can use to read arbitrary data formats.
Definition: TDataSource.hxx:51
virtual Bool_t InheritsFrom(const char *classname) const
Returns kTRUE if object inherits from class "classname".
Definition: TObject.cxx:443
virtual TLeaf * GetLeafCount() const
Definition: TLeaf.h:72
virtual std::string GetTypeName(std::string_view) const =0
Type of a column as a string, e.g.
virtual Int_t GetLenStatic() const
Definition: TLeaf.h:75
A Branch for the case of an object.
virtual const std::type_info & GetTypeId() const =0
unsigned long long ULong64_t
Definition: RtypesCore.h:70
void GetBranchNamesImpl(TTree &t, std::set< std::string > &bNamesReg, ColumnNames_t &bNames, std::set< TTree *> &analysedTrees)
Definition: TDFUtils.cxx:234
static constexpr double s
TObjArray * GetListOfLeaves()
Definition: TBranch.h:195
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2887
TClassRef is used to implement a permanent reference to a TClass object.
Definition: TClassRef.h:29
void CheckSnapshot(unsigned int nTemplateParams, unsigned int nColumnNames)
Definition: TDFUtils.cxx:300
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, TCustomColumnBase *tmpBranch, TDataSource *ds)
Return a string containing the type of the given branch.
Definition: TDFUtils.cxx:123
auto * l
Definition: textangle.C:4
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:584
A TFriendElement TF describes a TTree object TF in a file.
unsigned int GetNSlots()
Definition: TDFUtils.cxx:222
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
Int_t GetEntries() const
Return the number of objects in array (i.e.
Definition: TObjArray.cxx:522
Definition: tree.py:1
virtual bool HasColumn(std::string_view) const =0
Checks if the dataset has a certain column.
A TTree is a list of TBranches.
Definition: TBranch.h:59
ColumnNames_t GetBranchNames(TTree &t)
Get all the branches names, including the ones of the friend trees.
Definition: TDFUtils.cxx:265
char name[80]
Definition: TGX11.cxx:109