Logo ROOT   6.16/01
Reference Guide
RDFUtils.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#include "RConfigure.h" // R__USE_IMT
12#include "ROOT/RDataSource.hxx"
14#include "RtypesCore.h"
15#include "TBranch.h"
16#include "TBranchElement.h"
17#include "TClass.h"
18#include "TClassEdit.h"
19#include "TClassRef.h"
20#include "TLeaf.h"
21#include "TObjArray.h"
22#include "TROOT.h" // IsImplicitMTEnabled, GetImplicitMTPoolSize
23#include "TTree.h"
24
25#include <stdexcept>
26#include <string>
27#include <typeinfo>
28
29using namespace ROOT::Detail::RDF;
30using namespace ROOT::RDF;
31
32namespace ROOT {
33namespace Internal {
34namespace RDF {
35
36/// Return the type_info associated to a name. If the association fails, an
37/// exception is thrown.
38/// References and pointers are not supported since those cannot be stored in
39/// columns.
40const std::type_info &TypeName2TypeID(const std::string &name)
41{
42 if (auto c = TClass::GetClass(name.c_str())) {
43 return *c->GetTypeInfo();
44 } else if (name == "char" || name == "Char_t")
45 return typeid(char);
46 else if (name == "unsigned char" || name == "UChar_t")
47 return typeid(unsigned char);
48 else if (name == "int" || name == "Int_t")
49 return typeid(int);
50 else if (name == "unsigned int" || name == "UInt_t")
51 return typeid(unsigned int);
52 else if (name == "short" || name == "Short_t")
53 return typeid(short);
54 else if (name == "unsigned short" || name == "UShort_t")
55 return typeid(unsigned short);
56 else if (name == "long" || name == "Long_t")
57 return typeid(long);
58 else if (name == "unsigned long" || name == "ULong_t")
59 return typeid(unsigned long);
60 else if (name == "double" || name == "Double_t")
61 return typeid(double);
62 else if (name == "float" || name == "Float_t")
63 return typeid(float);
64 else if (name == "long long" || name == "long long int" || name == "Long64_t")
65 return typeid(Long64_t);
66 else if (name == "unsigned long long" || name == "unsigned long long int" || name == "ULong64_t")
67 return typeid(ULong64_t);
68 else if (name == "bool" || name == "Bool_t")
69 return typeid(bool);
70 else {
71 std::string msg("Cannot extract type_info of type ");
72 msg += name.c_str();
73 msg += ".";
74 throw std::runtime_error(msg);
75 }
76}
77
78/// Returns the name of a type starting from its type_info
79/// An empty string is returned in case of failure
80/// References and pointers are not supported since those cannot be stored in
81/// columns.
82std::string TypeID2TypeName(const std::type_info &id)
83{
84 if (auto c = TClass::GetClass(id)) {
85 return c->GetName();
86 } else if (id == typeid(char))
87 return "char";
88 else if (id == typeid(unsigned char))
89 return "unsigned char";
90 else if (id == typeid(int))
91 return "int";
92 else if (id == typeid(unsigned int))
93 return "unsigned int";
94 else if (id == typeid(short))
95 return "short";
96 else if (id == typeid(unsigned short))
97 return "unsigned short";
98 else if (id == typeid(long))
99 return "long";
100 else if (id == typeid(unsigned long))
101 return "unsigned long";
102 else if (id == typeid(double))
103 return "double";
104 else if (id == typeid(float))
105 return "float";
106 else if (id == typeid(Long64_t))
107 return "Long64_t";
108 else if (id == typeid(ULong64_t))
109 return "ULong64_t";
110 else if (id == typeid(bool))
111 return "bool";
112 else
113 return "";
114}
115
116std::string ComposeRVecTypeName(const std::string &valueType)
117{
118 return "ROOT::VecOps::RVec<" + valueType + ">";
119}
120
121std::string GetLeafTypeName(TLeaf *leaf, const std::string &colName)
122{
123 std::string colType = leaf->GetTypeName();
124 if (colType.empty())
125 throw std::runtime_error("Could not deduce type of leaf " + colName);
126 if (leaf->GetLeafCount() != nullptr && leaf->GetLenStatic() == 1) {
127 // this is a variable-sized array
128 colType = ComposeRVecTypeName(colType);
129 } else if (leaf->GetLeafCount() == nullptr && leaf->GetLenStatic() > 1) {
130 // this is a fixed-sized array (we do not differentiate between variable- and fixed-sized arrays)
131 colType = ComposeRVecTypeName(colType);
132 } else if (leaf->GetLeafCount() != nullptr && leaf->GetLenStatic() > 1) {
133 // we do not know how to deal with this branch
134 throw std::runtime_error("TTree leaf " + colName +
135 " has both a leaf count and a static length. This is not supported.");
136 }
137
138 return colType;
139}
140
141/// Return the typename of object colName stored in t, if any. Return an empty string if colName is not in t.
142/// Supported cases:
143/// - leaves corresponding to single values, variable- and fixed-length arrays, with following syntax:
144/// - "leafname", as long as TTree::GetLeaf resolves it
145/// - "b1.b2...leafname", as long as TTree::GetLeaf("b1.b2....", "leafname") resolves it
146/// - TBranchElements, as long as TTree::GetBranch resolves their names
147std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
148{
149 // look for TLeaf either with GetLeaf(colName) or with GetLeaf(branchName, leafName) (splitting on last dot)
150 auto leaf = t.GetLeaf(colName.c_str());
151 if (!leaf) {
152 const auto dotPos = colName.find_last_of('.');
153 const auto hasDot = dotPos != std::string::npos;
154 if (hasDot) {
155 const auto branchName = colName.substr(0, dotPos);
156 const auto leafName = colName.substr(dotPos + 1);
157 leaf = t.GetLeaf(branchName.c_str(), leafName.c_str());
158 }
159 }
160 if (leaf)
161 return GetLeafTypeName(leaf, colName);
162
163 // we could not find a leaf named colName, so we look for a TBranchElement
164 auto branch = t.GetBranch(colName.c_str());
165 if (branch) {
166 static const TClassRef tbranchelement("TBranchElement");
167 if (branch->InheritsFrom(tbranchelement)) {
168 auto be = static_cast<TBranchElement *>(branch);
169 if (auto currentClass = be->GetCurrentClass())
170 return currentClass->GetName();
171 else
172 return be->GetClassName();
173 }
174 }
175
176 // colName is not a leaf nor a TBranchElement
177 return std::string();
178}
179
180/// Return a string containing the type of the given branch. Works both with real TTree branches and with temporary
181/// column created by Define. Throws if type name deduction fails.
182/// Note that for fixed- or variable-sized c-style arrays the returned type name will be RVec<T>.
183/// vector2tvec specifies whether typename 'std::vector<T>' should be converted to 'RVec<T>' or returned as is
184/// customColID is only used if isCustomColumn is true, and must correspond to the custom column's unique identifier
185/// returned by its `GetID()` method.
186std::string ColumnName2ColumnTypeName(const std::string &colName, unsigned int namespaceID, TTree *tree,
187 RDataSource *ds, bool isCustomColumn, bool vector2tvec, unsigned int customColID)
188{
189 std::string colType;
190
191 if (ds && ds->HasColumn(colName))
192 colType = ds->GetTypeName(colName);
193
194 if (colType.empty() && tree) {
195 colType = GetBranchOrLeafTypeName(*tree, colName);
196 if (vector2tvec && TClassEdit::IsSTLCont(colType) == ROOT::ESTLType::kSTLvector) {
197 std::vector<std::string> split;
198 int dummy;
199 TClassEdit::GetSplit(colType.c_str(), split, dummy);
200 auto &valueType = split[1];
201 colType = ComposeRVecTypeName(valueType);
202 }
203 }
204
205 if (colType.empty() && isCustomColumn) {
206 // this must be a temporary branch, we know there is an alias for its type
207 colType = "__tdf" + std::to_string(namespaceID) + "::" + colName + std::to_string(customColID) + "_type";
208 }
209
210 if (colType.empty())
211 throw std::runtime_error("Column \"" + colName +
212 "\" is not in a dataset and is not a custom column been defined.");
213
214 return colType;
215}
216
217/// Convert type name (e.g. "Float_t") to ROOT type code (e.g. 'F') -- see TBranch documentation.
218/// Return a space ' ' in case no match was found.
219char TypeName2ROOTTypeName(const std::string &b)
220{
221 if (b == "Char_t" || b == "char")
222 return 'B';
223 if (b == "UChar_t" || b == "unsigned char")
224 return 'b';
225 if (b == "Short_t" || b == "short" || b == "short int")
226 return 'S';
227 if (b == "UShort_t" || b == "unsigned short" || b == "unsigned short int")
228 return 's';
229 if (b == "Int_t" || b == "int")
230 return 'I';
231 if (b == "UInt_t" || b == "unsigned" || b == "unsigned int")
232 return 'i';
233 if (b == "Float_t" || b == "float")
234 return 'F';
235 if (b == "Double_t" || b == "double")
236 return 'D';
237 if (b == "Long64_t" || b == "long" || b == "long int")
238 return 'L';
239 if (b == "ULong64_t" || b == "unsigned long" || b == "unsigned long int")
240 return 'l';
241 if (b == "Bool_t" || b == "bool")
242 return 'O';
243 return ' ';
244}
245
246unsigned int GetNSlots()
247{
248 unsigned int nSlots = 1;
249#ifdef R__USE_IMT
252#endif // R__USE_IMT
253 return nSlots;
254}
255
256/// Replace occurrences of '.' with '_' in each string passed as argument.
257/// An Info message is printed when this happens. Dots at the end of the string are not replaced.
258/// An exception is thrown in case the resulting set of strings would contain duplicates.
259std::vector<std::string> ReplaceDotWithUnderscore(const std::vector<std::string> &columnNames)
260{
261 auto newColNames = columnNames;
262 for (auto &col : newColNames) {
263 const auto dotPos = col.find('.');
264 if (dotPos != std::string::npos && dotPos != col.size() - 1 && dotPos != 0u) {
265 auto oldName = col;
266 std::replace(col.begin(), col.end(), '.', '_');
267 if (std::find(columnNames.begin(), columnNames.end(), col) != columnNames.end())
268 throw std::runtime_error("Column " + oldName + " would be written as " + col +
269 " but this column already exists. Please use Alias to select a new name for " +
270 oldName);
271 Info("Snapshot", "Column %s will be saved as %s", oldName.c_str(), col.c_str());
272 }
273 }
274
275 return newColNames;
276}
277
278} // end NS RDF
279} // end NS Internal
280} // end NS ROOT
#define b(i)
Definition: RSha256.hxx:100
#define c(i)
Definition: RSha256.hxx:101
static RooMathCoreReg dummy
long long Long64_t
Definition: RtypesCore.h:69
unsigned long long ULong64_t
Definition: RtypesCore.h:70
void Info(const char *location, const char *msgfmt,...)
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual bool HasColumn(std::string_view) const =0
Checks if the dataset has a certain column.
virtual std::string GetTypeName(std::string_view) const =0
Type of a column as a string, e.g.
A Branch for the case of an object.
TClassRef is used to implement a permanent reference to a TClass object.
Definition: TClassRef.h:29
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2885
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition: TLeaf.h:32
virtual const char * GetTypeName() const
Definition: TLeaf.h:93
virtual TLeaf * GetLeafCount() const
If this leaf stores a variable-sized array or a multi-dimensional array whose last dimension has vari...
Definition: TLeaf.h:78
virtual Int_t GetLenStatic() const
Return the fixed length of this leaf.
Definition: TLeaf.h:86
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition: RDFUtils.cxx:259
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition: RDFUtils.cxx:40
unsigned int GetNSlots()
Definition: RDFUtils.cxx:246
std::string ComposeRVecTypeName(const std::string &valueType)
Definition: RDFUtils.cxx:116
std::string GetLeafTypeName(TLeaf *leaf, const std::string &colName)
Definition: RDFUtils.cxx:121
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition: RDFUtils.cxx:219
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:82
std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
Return the typename of object colName stored in t, if any.
Definition: RDFUtils.cxx:147
std::string ColumnName2ColumnTypeName(const std::string &colName, unsigned int namespaceID, TTree *tree, RDataSource *ds, bool isCustomColumn, bool vector2tvec, unsigned int customColID)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:186
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
@ kSTLvector
Definition: ESTLType.h:30
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:607
UInt_t GetImplicitMTPoolSize()
Returns the size of the pool used for implicit multi-threading.
Definition: TROOT.cxx:614
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
int GetSplit(const char *type, std::vector< std::string > &output, int &nestedLoc, EModType mode=TClassEdit::kNone)
Stores in output (after emptying it) the split type.
Definition: TClassEdit.cxx:948
Definition: tree.py:1