Logo ROOT   6.14/05
Reference Guide
RDFUtils.cxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #include <stdexcept>
12 #include <string>
13 #include <typeinfo>
14 
15 #include "RConfigure.h" // R__USE_IMT
16 #include "ROOT/RDFNodes.hxx" // ColumnName2ColumnTypeName -> RCustomColumnBase, FindUnknownColumns -> RLoopManager
17 #include "ROOT/RDataSource.hxx"
18 #include "RtypesCore.h"
19 #include "TBranch.h"
20 #include "TBranchElement.h"
21 #include "TClass.h"
22 #include "TClassEdit.h"
23 #include "TClassRef.h"
24 #include "TLeaf.h"
25 #include "TObjArray.h"
26 #include "TROOT.h" // IsImplicitMTEnabled, GetImplicitMTPoolSize
27 #include "TTree.h"
28 
29 using namespace ROOT::Detail::RDF;
30 using namespace ROOT::RDF;
31 
32 namespace ROOT {
33 namespace Internal {
34 namespace RDF {
35 
36 /// Return the type_info associated to a name. If the association fails, an
37 /// exception is thrown.
38 /// References and pointers are not supported since those cannot be stored in
39 /// columns.
40 const std::type_info &TypeName2TypeID(const std::string &name)
41 {
42  if (auto c = TClass::GetClass(name.c_str())) {
43  return *c->GetTypeInfo();
44  } else if (name == "char" || name == "Char_t")
45  return typeid(char);
46  else if (name == "unsigned char" || name == "UChar_t")
47  return typeid(unsigned char);
48  else if (name == "int" || name == "Int_t")
49  return typeid(int);
50  else if (name == "unsigned int" || name == "UInt_t")
51  return typeid(unsigned int);
52  else if (name == "short" || name == "Short_t")
53  return typeid(short);
54  else if (name == "unsigned short" || name == "UShort_t")
55  return typeid(unsigned short);
56  else if (name == "long" || name == "Long_t")
57  return typeid(long);
58  else if (name == "unsigned long" || name == "ULong_t")
59  return typeid(unsigned long);
60  else if (name == "double" || name == "Double_t")
61  return typeid(double);
62  else if (name == "float" || name == "Float_t")
63  return typeid(float);
64  else if (name == "long long" || name == "long long int" || name == "Long64_t")
65  return typeid(Long64_t);
66  else if (name == "unsigned long long" || name == "unsigned long long int" || name == "ULong64_t")
67  return typeid(ULong64_t);
68  else if (name == "bool" || name == "Bool_t")
69  return typeid(bool);
70  else {
71  std::string msg("Cannot extract type_info of type ");
72  msg += name.c_str();
73  msg += ".";
74  throw std::runtime_error(msg);
75  }
76 }
77 
78 /// Returns the name of a type starting from its type_info
79 /// An empty string is returned in case of failure
80 /// References and pointers are not supported since those cannot be stored in
81 /// columns.
82 std::string TypeID2TypeName(const std::type_info &id)
83 {
84  if (auto c = TClass::GetClass(id)) {
85  return c->GetName();
86  } else if (id == typeid(char))
87  return "char";
88  else if (id == typeid(unsigned char))
89  return "unsigned char";
90  else if (id == typeid(int))
91  return "int";
92  else if (id == typeid(unsigned int))
93  return "unsigned int";
94  else if (id == typeid(short))
95  return "short";
96  else if (id == typeid(unsigned short))
97  return "unsigned short";
98  else if (id == typeid(long))
99  return "long";
100  else if (id == typeid(unsigned long))
101  return "unsigned long";
102  else if (id == typeid(double))
103  return "double";
104  else if (id == typeid(float))
105  return "float";
106  else if (id == typeid(Long64_t))
107  return "Long64_t";
108  else if (id == typeid(ULong64_t))
109  return "ULong64_t";
110  else if (id == typeid(bool))
111  return "bool";
112  else
113  return "";
114 }
115 
116 std::string ComposeRVecTypeName(const std::string &valueType)
117 {
118  return "ROOT::VecOps::RVec<" + valueType + ">";
119 }
120 
121 std::string GetLeafTypeName(TLeaf *leaf, const std::string &colName)
122 {
123  std::string colType = leaf->GetTypeName();
124  if (colType.empty())
125  throw std::runtime_error("Could not deduce type of leaf " + colName);
126  if (leaf->GetLeafCount() != nullptr && leaf->GetLenStatic() == 1) {
127  // this is a variable-sized array
128  colType = ComposeRVecTypeName(colType);
129  } else if (leaf->GetLeafCount() == nullptr && leaf->GetLenStatic() > 1) {
130  // this is a fixed-sized array (we do not differentiate between variable- and fixed-sized arrays)
131  colType = ComposeRVecTypeName(colType);
132  } else if (leaf->GetLeafCount() != nullptr && leaf->GetLenStatic() > 1) {
133  // we do not know how to deal with this branch
134  throw std::runtime_error("TTree leaf " + colName +
135  " has both a leaf count and a static length. This is not supported.");
136  }
137 
138  return colType;
139 }
140 
141 /// Return the typename of object colName stored in t, if any. Return an empty string if colName is not in t.
142 /// Supported cases:
143 /// - leaves corresponding to single values, variable- and fixed-length arrays, with following syntax:
144 /// - "leafname", as long as TTree::GetLeaf resolves it
145 /// - "b1.b2...leafname", as long as TTree::GetLeaf("b1.b2....", "leafname") resolves it
146 /// - TBranchElements, as long as TTree::GetBranch resolves their names
147 std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
148 {
149  // look for TLeaf either with GetLeaf(colName) or with GetLeaf(branchName, leafName) (splitting on last dot)
150  auto leaf = t.GetLeaf(colName.c_str());
151  if (!leaf) {
152  const auto dotPos = colName.find_last_of('.');
153  const auto hasDot = dotPos != std::string::npos;
154  if (hasDot) {
155  const auto branchName = colName.substr(0, dotPos);
156  const auto leafName = colName.substr(dotPos + 1);
157  leaf = t.GetLeaf(branchName.c_str(), leafName.c_str());
158  }
159  }
160  if (leaf)
161  return GetLeafTypeName(leaf, colName);
162 
163  // we could not find a leaf named colName, so we look for a TBranchElement
164  auto branch = t.GetBranch(colName.c_str());
165  if (branch) {
166  static const TClassRef tbranchelement("TBranchElement");
167  if (branch->InheritsFrom(tbranchelement)) {
168  auto be = static_cast<TBranchElement *>(branch);
169  if (auto currentClass = be->GetCurrentClass())
170  return currentClass->GetName();
171  else
172  return be->GetClassName();
173  }
174  }
175 
176  // colName is not a leaf nor a TBranchElement
177  return std::string();
178 }
179 
180 /// Return a string containing the type of the given branch. Works both with real TTree branches and with temporary
181 /// column created by Define. Throws if type name deduction fails.
182 /// Note that for fixed- or variable-sized c-style arrays the returned type name will be RVec<T>.
183 /// vector2tvec specifies whether typename 'std::vector<T>' should be converted to 'RVec<T>' or returned as is
184 std::string ColumnName2ColumnTypeName(const std::string &colName, unsigned int namespaceID, TTree *tree,
185  RDataSource *ds, bool isCustomColumn, bool vector2tvec)
186 {
187  std::string colType;
188 
189  if (ds && ds->HasColumn(colName))
190  colType = ds->GetTypeName(colName);
191 
192  if (colType.empty() && tree) {
193  colType = GetBranchOrLeafTypeName(*tree, colName);
194  if (vector2tvec && TClassEdit::IsSTLCont(colType) == ROOT::ESTLType::kSTLvector) {
195  std::vector<std::string> split;
196  int dummy;
197  TClassEdit::GetSplit(colType.c_str(), split, dummy);
198  auto &valueType = split[1];
199  colType = ComposeRVecTypeName(valueType);
200  }
201  }
202 
203  if (colType.empty() && isCustomColumn) {
204  // this must be a temporary branch, we know there is an alias for its type
205  colType = "__tdf" + std::to_string(namespaceID) + "::" + colName + "_type";
206  }
207 
208  if (colType.empty())
209  throw std::runtime_error("Column \"" + colName +
210  "\" is not in a dataset and is not a custom column been defined.");
211 
212  return colType;
213 }
214 
215 /// Convert type name (e.g. "Float_t") to ROOT type code (e.g. 'F') -- see TBranch documentation.
216 /// Return a space ' ' in case no match was found.
217 char TypeName2ROOTTypeName(const std::string &b)
218 {
219  if (b == "Char_t" || b == "char")
220  return 'B';
221  if (b == "UChar_t" || b == "unsigned char")
222  return 'b';
223  if (b == "Short_t" || b == "short" || b == "short int")
224  return 'S';
225  if (b == "UShort_t" || b == "unsigned short" || b == "unsigned short int")
226  return 's';
227  if (b == "Int_t" || b == "int")
228  return 'I';
229  if (b == "UInt_t" || b == "unsigned" || b == "unsigned int")
230  return 'i';
231  if (b == "Float_t" || b == "float")
232  return 'F';
233  if (b == "Double_t" || b == "double")
234  return 'D';
235  if (b == "Long64_t" || b == "long" || b == "long int")
236  return 'L';
237  if (b == "ULong64_t" || b == "unsigned long" || b == "unsigned long int")
238  return 'l';
239  if (b == "Bool_t" || b == "bool")
240  return 'O';
241  return ' ';
242 }
243 
244 unsigned int GetNSlots()
245 {
246  unsigned int nSlots = 1;
247 #ifdef R__USE_IMT
249  nSlots = ROOT::GetImplicitMTPoolSize();
250 #endif // R__USE_IMT
251  return nSlots;
252 }
253 
254 /// Replace occurrences of '.' with '_' in each string passed as argument.
255 /// An Info message is printed when this happens. Dots at the end of the string are not replaced.
256 /// An exception is thrown in case the resulting set of strings would contain duplicates.
257 std::vector<std::string> ReplaceDotWithUnderscore(const std::vector<std::string> &columnNames)
258 {
259  auto newColNames = columnNames;
260  for (auto &col : newColNames) {
261  const auto dotPos = col.find('.');
262  if (dotPos != std::string::npos && dotPos != col.size() - 1 && dotPos != 0u) {
263  auto oldName = col;
264  std::replace(col.begin(), col.end(), '.', '_');
265  if (std::find(columnNames.begin(), columnNames.end(), col) != columnNames.end())
266  throw std::runtime_error("Column " + oldName + " would be written as " + col +
267  " but this column already exists. Please use Alias to select a new name for " +
268  oldName);
269  Info("Snapshot", "Column %s will be saved as %s", oldName.c_str(), col.c_str());
270  }
271  }
272 
273  return newColNames;
274 }
275 
276 } // end NS RDF
277 } // end NS Internal
278 } // end NS ROOT
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition: TLeaf.h:32
UInt_t GetImplicitMTPoolSize()
Returns the size of the pool used for implicit multi-threading.
Definition: TROOT.cxx:614
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
long long Long64_t
Definition: RtypesCore.h:69
virtual bool HasColumn(std::string_view) const =0
Checks if the dataset has a certain column.
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
int GetSplit(const char *type, std::vector< std::string > &output, int &nestedLoc, EModType mode=TClassEdit::kNone)
Stores in output (after emptying it) the split type.
Definition: TClassEdit.cxx:940
virtual const char * GetTypeName() const
Definition: TLeaf.h:89
unsigned int GetNSlots()
Definition: RDFUtils.cxx:244
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:82
std::string GetLeafTypeName(TLeaf *leaf, const std::string &colName)
Definition: RDFUtils.cxx:121
virtual std::string GetTypeName(std::string_view) const =0
Type of a column as a string, e.g.
void Info(const char *location, const char *msgfmt,...)
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of &#39;.
Definition: RDFUtils.cxx:257
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition: RDFUtils.cxx:217
std::string ComposeRVecTypeName(const std::string &valueType)
Definition: RDFUtils.cxx:116
virtual TLeaf * GetLeafCount() const
If this leaf stores a variable-sized array or a multi-dimensional array whose last dimension has vari...
Definition: TLeaf.h:74
virtual Int_t GetLenStatic() const
Return the fixed length of this leaf.
Definition: TLeaf.h:82
A Branch for the case of an object.
std::string ColumnName2ColumnTypeName(const std::string &colName, unsigned int namespaceID, TTree *tree, RDataSource *ds, bool isCustomColumn, bool vector2tvec)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:184
unsigned long long ULong64_t
Definition: RtypesCore.h:70
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition: RDFUtils.cxx:40
static RooMathCoreReg dummy
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2887
TClassRef is used to implement a permanent reference to a TClass object.
Definition: TClassRef.h:29
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:607
std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
Return the typename of object colName stored in t, if any.
Definition: RDFUtils.cxx:147
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
#define c(i)
Definition: RSha256.hxx:101
Definition: tree.py:1
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
Definition: RDataSource.hxx:91
char name[80]
Definition: TGX11.cxx:109