11#include "RConfigure.h"
32#include <nlohmann/json.hpp>
49using TypeInfoRef = std::reference_wrapper<const std::type_info>;
50struct TypeInfoRefHash {
54struct TypeInfoRefEqualComp {
65 static unsigned int nThread = 1;
81 {
"short",
typeid(short)},
82 {
"short int",
typeid(
short int)},
83 {
"signed short",
typeid(
signed short)},
84 {
"signed short int",
typeid(
signed short int)},
85 {
"unsigned short",
typeid(
unsigned short)},
86 {
"unsigned short int",
typeid(
unsigned short int)},
88 {
"signed",
typeid(
signed)},
89 {
"signed int",
typeid(
signed int)},
90 {
"unsigned",
typeid(unsigned)},
91 {
"unsigned int",
typeid(
unsigned int)},
92 {
"long",
typeid(long)},
93 {
"long int",
typeid(
long int)},
94 {
"signed long",
typeid(
signed long)},
95 {
"signed long int",
typeid(
signed long int)},
96 {
"unsigned long",
typeid(
unsigned long)},
97 {
"unsigned long int",
typeid(
unsigned long int)},
98 {
"long long",
typeid(
long long)},
99 {
"long long int",
typeid(
long long int)},
100 {
"signed long long",
typeid(
signed long long)},
101 {
"signed long long int",
typeid(
signed long long int)},
102 {
"unsigned long long",
typeid(
unsigned long long)},
103 {
"unsigned long long int",
typeid(
unsigned long long int)},
104 {
"std::size_t",
typeid(std::size_t)},
107 {
"std::int8_t",
typeid(std::int8_t)},
110 {
"std::int16_t",
typeid(std::int16_t)},
113 {
"std::int32_t",
typeid(std::int32_t)},
116 {
"std::int64_t",
typeid(std::int64_t)},
119 {
"std::uint8_t",
typeid(std::uint8_t)},
122 {
"std::uint16_t",
typeid(std::uint16_t)},
125 {
"std::uint32_t",
typeid(std::uint32_t)},
128 {
"std::uint64_t",
typeid(std::uint64_t)},
131 {
"Int_t",
typeid(
Int_t)},
132 {
"UInt_t",
typeid(
UInt_t)},
135 {
"Long_t",
typeid(
Long_t)},
140 {
"bool",
typeid(
bool)},
141 {
"Bool_t",
typeid(
bool)},
143 {
"char",
typeid(char)},
144 {
"Char_t",
typeid(char)},
145 {
"signed char",
typeid(
signed char)},
146 {
"unsigned char",
typeid(
unsigned char)},
147 {
"UChar_t",
typeid(
unsigned char)},
152 {
"float",
typeid(float)},
153 {
"double",
typeid(
double)},
154 {
"long double",
typeid(
long double)},
156 {
"Float_t",
typeid(float)},
157 {
"Double_t",
typeid(
double)}};
160 return it->second.get();
163 if (!
c->GetTypeInfo()) {
164 throw std::runtime_error(
"Cannot extract type_info of type " +
name +
".");
166 return *
c->GetTypeInfo();
169 throw std::runtime_error(
"Cannot extract type_info of type " +
name +
".");
180 const static std::unordered_map<TypeInfoRef, std::string, TypeInfoRefHash, TypeInfoRefEqualComp>
typeID2TypeNameMap{
181 {
typeid(char),
"char"}, {
typeid(
unsigned char),
"unsigned char"},
182 {
typeid(
int),
"int"}, {
typeid(
unsigned int),
"unsigned int"},
183 {
typeid(short),
"short"}, {
typeid(
unsigned short),
"unsigned short"},
184 {
typeid(long),
"long"}, {
typeid(
unsigned long),
"unsigned long"},
185 {
typeid(
double),
"double"}, {
typeid(float),
"float"},
187 {
typeid(
bool),
"bool"}};
201 const static std::unordered_map<TypeInfoRef, char, TypeInfoRefHash, TypeInfoRefEqualComp>
typeID2ROOTTypeNameMap{
202 {
typeid(char),
'B'}, {
typeid(
Char_t),
'B'}, {
typeid(
unsigned char),
'b'}, {
typeid(
UChar_t),
'b'},
203 {
typeid(
int),
'I'}, {
typeid(
Int_t),
'I'}, {
typeid(
unsigned int),
'i'}, {
typeid(
UInt_t),
'i'},
204 {
typeid(short),
'S'}, {
typeid(
Short_t),
'S'}, {
typeid(
unsigned short),
's'}, {
typeid(
UShort_t),
's'},
205 {
typeid(long),
'G'}, {
typeid(
Long_t),
'G'}, {
typeid(
unsigned long),
'g'}, {
typeid(
ULong_t),
'g'},
206 {
typeid(
long long),
'L'}, {
typeid(
Long64_t),
'L'}, {
typeid(
unsigned long long),
'l'}, {
typeid(
ULong64_t),
'l'},
218 return "ROOT::VecOps::RVec<" +
valueType +
">";
226 throw std::runtime_error(
"Could not deduce type of leaf " +
colName);
227 if (
leaf->GetLeafCount() !=
nullptr &&
leaf->GetLenStatic() == 1) {
230 }
else if (
leaf->GetLeafCount() ==
nullptr &&
leaf->GetLenStatic() > 1) {
233 }
else if (
leaf->GetLeafCount() !=
nullptr &&
leaf->GetLenStatic() > 1) {
235 throw std::runtime_error(
"TTree leaf " +
colName +
236 " has both a leaf count and a static length. This is not supported.");
285 return be->GetTypeName();
287 return be->GetClassName();
292 leaf =
static_cast<TLeaf *
>(
branch->GetListOfLeaves()->UncheckedAt(0));
298 return std::string();
318 std::vector<std::string> split;
327 throw std::runtime_error(
"Column \"" +
colName +
328 "\" is not in a dataset and is not a custom column been defined.");
339 {
"unsigned char",
'b'},
344 {
"unsigned int",
'i'},
349 {
"unsigned short",
's'},
350 {
"unsigned short int",
's'},
355 {
"unsigned long",
'g'},
356 {
"unsigned long int",
'g'},
363 {
"long long int",
'L'},
365 {
"unsigned long long",
'l'},
366 {
"unsigned long long int",
'l'},
394 const auto dotPos = col.find(
'.');
397 std::replace(col.begin(), col.end(),
'.',
'_');
399 throw std::runtime_error(
"Column " +
oldName +
" would be written as " + col +
400 " but this column already exists. Please use Alias to select a new name for " +
402 Info(
"Snapshot",
"Column %s will be saved as %s",
oldName.c_str(), col.c_str());
415 "\nRDataFrame: An error occurred during just-in-time compilation. The lines above might indicate the cause of "
416 "the crash\n All RDF objects that have not run an event loop yet should be considered in an invalid state.\n";
417 throw std::runtime_error(
msg);
433 std::string
msg =
"\nAn error occurred during just-in-time compilation";
434 if (!context.empty())
435 msg +=
" in " + context;
437 ". The lines above might indicate the cause of the crash\nAll RDF objects that have not run their event "
438 "loop yet should be considered in an invalid state.\n";
439 throw std::runtime_error(
msg);
448 for (std::size_t i = 0
u; i < 1000u &&
substr_end != std::string::npos; ++i) {
460 const auto str =
colName.data();
462 (
'r' == str[0] ||
't' == str[0]) &&
463 0 ==
strncmp(
"df", str + 1, 2);
470 for (
const auto&
name : names) {
497 std::string
errMsg =
"RDataFrame: type mismatch: column \"" +
colName +
"\" is being used as ";
500 errMsg +=
" (extracted from type info)";
504 errMsg +=
" but the Define or Vary node advertises it as ";
508 errMsg +=
" (extracted from type info)";
512 throw std::runtime_error(
errMsg);
518 return std::find(
vec.cbegin(),
vec.cend(), str) !=
vec.cend();
524 std::shared_lock
l{fMutex};
525 if (
auto it = fStrings.find(
string); it != fStrings.end())
531 std::unique_lock
l{fMutex};
532 if (
auto it = fStrings.find(
string); it != fStrings.end())
535 return fStrings.insert(
string).first;
540 const nlohmann::ordered_json
fullData = nlohmann::ordered_json::parse(std::ifstream(
jsonFile));
542 throw std::runtime_error(
543 R
"(The input specification does not contain any samples. Please provide the samples in the specification like:
547 "trees": ["tree1", "tree2"],
548 "files": ["file1.root", "file2.root"],
549 "metadata": {"lumi": 1.0, }
552 "trees": ["tree3", "tree4"],
553 "files": ["file3.root", "file4.root"],
554 "metadata": {"lumi": 0.5, }
567 if (!
sample.contains(
"trees")) {
568 throw std::runtime_error(
"A list of tree names must be provided for sample " +
sampleName +
".");
571 if (!
sample.contains(
"files")) {
572 throw std::runtime_error(
"A list of files must be provided for sample " +
sampleName +
".");
575 if (!
sample.contains(
"metadata")) {
579 for (
const auto &metadata :
sample[
"metadata"].items()) {
580 const auto &val = metadata.value();
582 m.Add(metadata.key(), val.get<std::string>());
583 else if (val.is_number_integer())
584 m.Add(metadata.key(), val.get<
int>());
585 else if (val.is_number_float())
586 m.Add(metadata.key(), val.get<
double>());
588 throw std::logic_error(
"The metadata keys can only be of type [string|int|double].");
596 std::vector<std::string>
trees =
friends.value()[
"trees"];
597 std::vector<std::string>
files =
friends.value()[
"files"];
599 throw std::runtime_error(
"Mismatch between trees and files in a friend.");
607 if (
range.size() == 1)
609 else if (
range.size() == 2)
627 return df.GetTopLevelFieldNames();
632 return df.GetColumnNamesNoDuplicates();
643 return ds.DescribeDataset();
648 const std::unordered_map<std::string, ROOT::RDF::Experimental::RSample *> &
sampleMap)
663std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
672 return std::move(
spec.fSamples);
#define R__LOG_DEBUG(DEBUGLEVEL,...)
unsigned long long ULong64_t
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
TRObject operator()(const T1 &t1) const
std::string GetTypeName() const
The head node of a RDF computation graph.
auto Insert(const std::string &string) -> decltype(fStrings)::const_iterator
Inserts the input string in the cache and returns an iterator to the cached string.
The dataset specification for RDataFrame.
Class representing a sample which is a grouping of trees and their fileglobs, and,...
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
A log configuration for a channel, e.g.
const_iterator begin() const
const_iterator end() const
A Branch for the case of an object.
TClassRef is used to implement a permanent reference to a TClass object.
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
A TTree represents a columnar dataset.
virtual TBranch * FindBranch(const char *name)
Return the branch that correspond to the path 'branchname', which can include the name of the tree or...
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
virtual TLeaf * GetLeaf(const char *branchname, const char *leafname)
Return pointer to the 1st Leaf named name in any Branch of this Tree or any branch in the list of fri...
virtual TLeaf * FindLeaf(const char *name)
Find leaf..
ROOT::RLogChannel & RDFLogChannel()
void RunFinalChecks(const ROOT::RDF::RDataSource &ds, bool nodesLeftNotRun)
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
ROOT::RDF::RSampleInfo CreateSampleInfo(const ROOT::RDF::RDataSource &ds, unsigned int slot, const std::unordered_map< std::string, ROOT::RDF::Experimental::RSample * > &sampleMap)
ROOT::RDF::Experimental::RDatasetSpec RetrieveSpecFromJson(const std::string &jsonFile)
Function to retrieve RDatasetSpec from JSON file provided.
std::string ComposeRVecTypeName(const std::string &valueType)
void CallInitializeWithOpts(ROOT::RDF::RDataSource &ds, const std::set< std::string > &suppressErrorsForMissingColumns)
std::string GetLeafTypeName(TLeaf *leaf, const std::string &colName)
const std::vector< std::string > & GetTopLevelFieldNames(const ROOT::RDF::RDataSource &ds)
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
Return the typename of object colName stored in t, if any.
std::string DescribeDataset(ROOT::RDF::RDataSource &ds)
std::unique_ptr< ROOT::Detail::RDF::RColumnReaderBase > CreateColumnReader(ROOT::RDF::RDataSource &ds, unsigned int slot, std::string_view col, const std::type_info &tid, TTreeReader *treeReader)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
void InterpreterCalc(const std::string &code, const std::string &context="")
Jit code in the interpreter with TInterpreter::Calc, throw in case of errors.
void CheckReaderTypeMatches(const std::type_info &colType, const std::type_info &requestedType, const std::string &colName)
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
std::vector< ROOT::RDF::Experimental::RSample > MoveOutSamples(ROOT::RDF::Experimental::RDatasetSpec &spec)
void ProcessMT(ROOT::RDF::RDataSource &ds, ROOT::Detail::RDF::RLoopManager &lm)
std::string GetTypeNameWithOpts(const ROOT::RDF::RDataSource &ds, std::string_view colName, bool vector2RVec)
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
const std::vector< std::string > & GetColumnNamesNoDuplicates(const ROOT::RDF::RDataSource &ds)
unsigned int & NThreadPerTH3()
Obtain or set the number of threads that will share a clone of a thread-safe 3D histogram.
char TypeID2ROOTTypeName(const std::type_info &tid)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
UInt_t GetThreadPoolSize()
Returns the size of ROOT's thread pool.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
int GetSplit(const char *type, std::vector< std::string > &output, int &nestedLoc, EModType mode=TClassEdit::kNone)
Stores in output (after emptying it) the split type.