Logo ROOT  
Reference Guide
RDFInterfaceUtils.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
12#include <ROOT/RDataFrame.hxx>
13#include <ROOT/RStringView.hxx>
14#include <ROOT/TSeq.hxx>
15#include <RtypesCore.h>
16#include <TDirectory.h>
17#include <TChain.h>
18#include <TClass.h>
19#include <TClassEdit.h>
20#include <TFriendElement.h>
21#include <TInterpreter.h>
22#include <TObject.h>
23#include <TPRegexp.h>
24#include <TString.h>
25#include <TTree.h>
26
27// pragma to disable warnings on Rcpp which have
28// so many noise compiling
29#if defined(__GNUC__)
30#pragma GCC diagnostic push
31#pragma GCC diagnostic ignored "-Woverloaded-virtual"
32#pragma GCC diagnostic ignored "-Wshadow"
33#endif
34#include "lexertk.hpp"
35#if defined(__GNUC__)
36#pragma GCC diagnostic pop
37#endif
38
39#include <algorithm>
40#include <unordered_set>
41#include <stdexcept>
42#include <string>
43#include <sstream>
44#include <typeinfo>
45
46namespace ROOT {
47namespace Detail {
48namespace RDF {
49class RDefineBase;
50class RFilterBase;
51class RLoopManager;
52class RRangeBase;
53} // namespace RDF
54} // namespace Detail
55
56namespace RDF {
57class RDataSource;
58} // namespace RDF
59
60} // namespace ROOT
61
62namespace {
65
66/// A string expression such as those passed to Filter and Define, digested to a standardized form
67struct ParsedExpression {
68 /// The string expression with the dummy variable names in fVarNames in place of the original column names
69 std::string fExpr;
70 /// The list of valid column names that were used in the original string expression.
71 /// Duplicates are removed and column aliases (created with Alias calls) are resolved.
72 ColumnNames_t fUsedCols;
73 /// The list of variable names used in fExpr, with same ordering and size as fUsedCols
74 ColumnNames_t fVarNames;
75};
76
77// look at expression `expr` and return a list of column names used, including aliases
78static ColumnNames_t FindUsedColumns(const std::string &expr, const ColumnNames_t &treeBranchNames,
79 const ROOT::Internal::RDF::RColumnRegister &customColumns,
80 const ColumnNames_t &dataSourceColNames)
81{
82 ColumnNames_t usedCols;
83
84 lexertk::generator tokens;
85 const auto tokensOk = tokens.process(expr);
86 if (!tokensOk) {
87 const auto msg = "Failed to tokenize expression:\n" + expr + "\n\nMake sure it is valid C++.";
88 throw std::runtime_error(msg);
89 }
90
91 // iterate over tokens in expression and fill usedCols, varNames and exprWithVars
92 const auto nTokens = tokens.size();
93 const auto kSymbol = lexertk::token::e_symbol;
94 for (auto i = 0u; i < nTokens; ++i) {
95 const auto &tok = tokens[i];
96 // lexertk classifies '&' as e_symbol for some reason
97 if (tok.type != kSymbol || tok.value == "&" || tok.value == "|") {
98 // token is not a potential variable name, skip it
99 continue;
100 }
101
102 ColumnNames_t potentialColNames({tok.value});
103
104 // if token is the start of a dot chain (a.b.c...), a.b, a.b.c etc. are also potential column names
105 auto dotChainKeepsGoing = [&](unsigned int _i) {
106 return _i + 2 <= nTokens && tokens[_i + 1].value == "." && tokens[_i + 2].type == kSymbol;
107 };
108 while (dotChainKeepsGoing(i)) {
109 potentialColNames.emplace_back(potentialColNames.back() + "." + tokens[i + 2].value);
110 i += 2; // consume the tokens we looked at
111 }
112
113 // find the longest potential column name that is an actual column name
114 // if it's a new match, also add it to usedCols and update varNames
115 // potential columns are sorted by length, so we search from the end
116 auto isRDFColumn = [&](const std::string &columnOrAlias) {
117 const auto &col = customColumns.ResolveAlias(columnOrAlias);
118 if (customColumns.HasName(col) || IsStrInVec(col, treeBranchNames) || IsStrInVec(col, dataSourceColNames))
119 return true;
120 return false;
121 };
122 const auto longestRDFColMatch = std::find_if(potentialColNames.crbegin(), potentialColNames.crend(), isRDFColumn);
123
124 if (longestRDFColMatch != potentialColNames.crend() && !IsStrInVec(*longestRDFColMatch, usedCols)) {
125 // found a new RDF column in the expression (potentially an alias)
126 usedCols.emplace_back(*longestRDFColMatch);
127 }
128 }
129
130 return usedCols;
131}
132
133static ParsedExpression ParseRDFExpression(std::string_view expr, const ColumnNames_t &treeBranchNames,
134 const ROOT::Internal::RDF::RColumnRegister &customColumns,
135 const ColumnNames_t &dataSourceColNames)
136{
137 // transform `#var` into `R_rdf_sizeof_var`
138 TString preProcessedExpr(expr);
139 // match #varname at beginning of the sentence or after not-a-word, but exclude preprocessor directives like #ifdef
140 TPRegexp colSizeReplacer(
141 "(^|\\W)#(?!(ifdef|ifndef|if|else|elif|endif|pragma|define|undef|include|line))([a-zA-Z_][a-zA-Z0-9_]*)");
142 colSizeReplacer.Substitute(preProcessedExpr, "$1R_rdf_sizeof_$3", "g");
143
144 const auto usedColsAndAliases =
145 FindUsedColumns(std::string(preProcessedExpr), treeBranchNames, customColumns, dataSourceColNames);
146
147 auto escapeDots = [](const std::string &s) {
148 TString ss(s);
149 TPRegexp dot("\\.");
150 dot.Substitute(ss, "\\.", "g");
151 return std::string(std::move(ss));
152 };
153
154 ColumnNames_t varNames;
155 ColumnNames_t usedCols;
156 // when we are done, exprWithVars willl be the same as preProcessedExpr but column names will be substituted with
157 // the dummy variable names in varNames
158 TString exprWithVars(preProcessedExpr);
159 for (const auto &colOrAlias : usedColsAndAliases) {
160 const auto col = customColumns.ResolveAlias(colOrAlias);
161 unsigned int varIdx; // index of the variable in varName corresponding to col
162 if (!IsStrInVec(col, usedCols)) {
163 usedCols.emplace_back(col);
164 varIdx = varNames.size();
165 varNames.emplace_back("var" + std::to_string(varIdx));
166 } else {
167 // colOrAlias must be an alias that resolves to a column we have already seen.
168 // Find back the corresponding varName
169 varIdx = std::distance(usedCols.begin(), std::find(usedCols.begin(), usedCols.end(), col));
170 }
171 TPRegexp replacer("\\b" + escapeDots(colOrAlias) + "\\b"); // watch out: need to replace colOrAlias, not col
172 replacer.Substitute(exprWithVars, varNames[varIdx], "g");
173 }
174
175 return ParsedExpression{std::string(std::move(exprWithVars)), std::move(usedCols), std::move(varNames)};
176}
177
178/// Return the static global map of Filter/Define lambda expressions that have been jitted.
179/// It's used to check whether a given expression has already been jitted, and
180/// to look up its associated variable name if it is.
181/// Keys in the map are the body of the expression, values are the name of the
182/// jitted variable that corresponds to that expression. For example, for:
183/// auto lambda1 = [] { return 42; };
184/// key would be "[] { return 42; }" and value would be "lambda1".
185static std::unordered_map<std::string, std::string> &GetJittedExprs() {
186 static std::unordered_map<std::string, std::string> jittedExpressions;
187 return jittedExpressions;
188}
189
190static std::string
191BuildLambdaString(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes)
192{
193 assert(vars.size() == varTypes.size());
194
195 TPRegexp re(R"(\breturn\b)");
196 const bool hasReturnStmt = re.MatchB(expr);
197
198 static const std::vector<std::string> fundamentalTypes = {
199 "int",
200 "signed",
201 "signed int",
202 "Int_t",
203 "unsigned",
204 "unsigned int",
205 "UInt_t",
206 "double",
207 "Double_t",
208 "float",
209 "Float_t",
210 "char",
211 "Char_t",
212 "unsigned char",
213 "UChar_t",
214 "bool",
215 "Bool_t",
216 "short",
217 "short int",
218 "Short_t",
219 "long",
220 "long int",
221 "long long int",
222 "Long64_t",
223 "unsigned long",
224 "unsigned long int",
225 "ULong64_t",
226 "std::size_t",
227 "size_t",
228 "Ssiz_t"
229 };
230
231 std::stringstream ss;
232 ss << "[](";
233 for (auto i = 0u; i < vars.size(); ++i) {
234 std::string fullType;
235 const auto &type = varTypes[i];
236 if (std::find(fundamentalTypes.begin(), fundamentalTypes.end(), type) != fundamentalTypes.end()) {
237 // pass it by const value to help detect common mistakes such as if(x = 3)
238 fullType = "const " + type + " ";
239 } else {
240 // We pass by reference to avoid expensive copies
241 // It can't be const reference in general, as users might want/need to call non-const methods on the values
242 fullType = type + "& ";
243 }
244 ss << fullType << vars[i] << ", ";
245 }
246 if (!vars.empty())
247 ss.seekp(-2, ss.cur);
248
249 if (hasReturnStmt)
250 ss << "){";
251 else
252 ss << "){return ";
253 ss << expr << "\n;}";
254
255 return ss.str();
256}
257
258/// Declare a lambda expression to the interpreter in namespace R_rdf, return the name of the jitted lambda.
259/// If the lambda expression is already in GetJittedExprs, return the name for the lambda that has already been jitted.
260static std::string DeclareLambda(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes)
261{
263
264 const auto lambdaExpr = BuildLambdaString(expr, vars, varTypes);
265 auto &exprMap = GetJittedExprs();
266 const auto exprIt = exprMap.find(lambdaExpr);
267 if (exprIt != exprMap.end()) {
268 // expression already there
269 const auto lambdaName = exprIt->second;
270 return lambdaName;
271 }
272
273 // new expression
274 const auto lambdaBaseName = "lambda" + std::to_string(exprMap.size());
275 const auto lambdaFullName = "R_rdf::" + lambdaBaseName;
276
277 const auto toDeclare = "namespace R_rdf {\nauto " + lambdaBaseName + " = " + lambdaExpr + ";\nusing " +
278 lambdaBaseName + "_ret_t = typename ROOT::TypeTraits::CallableTraits<decltype(" +
279 lambdaBaseName + ")>::ret_type;\n}";
281
282 // InterpreterDeclare could throw. If it doesn't, mark the lambda as already jitted
283 exprMap.insert({lambdaExpr, lambdaFullName});
284
285 return lambdaFullName;
286}
287
288/// Each jitted lambda comes with a lambda_ret_t type alias for its return type.
289/// Resolve that alias and return the true type as string.
290static std::string RetTypeOfLambda(const std::string &lambdaName)
291{
292 const auto dt = gROOT->GetType((lambdaName + "_ret_t").c_str());
293 R__ASSERT(dt != nullptr);
294 const auto type = dt->GetFullTypeName();
295 return type;
296}
297
298static void GetTopLevelBranchNamesImpl(TTree &t, std::set<std::string> &bNamesReg, ColumnNames_t &bNames,
299 std::set<TTree *> &analysedTrees, const std::string friendName = "")
300{
301 if (!analysedTrees.insert(&t).second) {
302 return;
303 }
304
305 auto branches = t.GetListOfBranches();
306 if (branches) {
307 for (auto branchObj : *branches) {
308 const auto name = branchObj->GetName();
309 if (bNamesReg.insert(name).second) {
310 bNames.emplace_back(name);
311 } else if (!friendName.empty()) {
312 // If this is a friend and the branch name has already been inserted, it might be because the friend
313 // has a branch with the same name as a branch in the main tree. Let's add it as <friendname>.<branchname>.
314 // If used for a Snapshot, this name will become <friendname>_<branchname> (with an underscore).
315 const auto longName = friendName + "." + name;
316 if (bNamesReg.insert(longName).second)
317 bNames.emplace_back(longName);
318 }
319 }
320 }
321
322 auto friendTrees = t.GetListOfFriends();
323
324 if (!friendTrees)
325 return;
326
327 for (auto friendTreeObj : *friendTrees) {
328 auto friendElement = static_cast<TFriendElement *>(friendTreeObj);
329 auto friendTree = friendElement->GetTree();
330 const std::string frName(friendElement->GetName()); // this gets us the TTree name or the friend alias if any
331 GetTopLevelBranchNamesImpl(*friendTree, bNamesReg, bNames, analysedTrees, frName);
332 }
333}
334
335[[noreturn]] void
336ThrowJitBuildActionHelperTypeError(const std::string &actionTypeNameBase, const std::type_info &helperArgType)
337{
338 int err = 0;
339 const char *cname = TClassEdit::DemangleTypeIdName(helperArgType, err);
340 std::string actionHelperTypeName = cname;
341 delete[] cname;
342 if (err != 0)
343 actionHelperTypeName = helperArgType.name();
344
345 std::string exceptionText =
346 "RDataFrame::Jit: cannot just-in-time compile a \"" + actionTypeNameBase + "\" action using helper type \"" +
347 actionHelperTypeName +
348 "\". This typically happens in a custom `Fill` or `Book` invocation where the types of the input columns have "
349 "not been specified as template parameters and the ROOT interpreter has no knowledge of this type of action "
350 "helper. Please add template parameters for the types of the input columns to avoid jitting this action (i.e. "
351 "`df.Fill<float>(..., {\"x\"})`, where `float` is the type of `x`) or declare the action helper type to the "
352 "interpreter, e.g. via gInterpreter->Declare.";
353
354 throw std::runtime_error(exceptionText);
355}
356
357} // anonymous namespace
358
359namespace ROOT {
360namespace Internal {
361namespace RDF {
362
363/// Take a list of column names, return that list with entries starting by '#' filtered out.
364/// The function throws when filtering out a column this way.
365ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
366{
367 ColumnNames_t columnListWithoutSizeColumns;
368 ColumnNames_t filteredColumns;
369 std::copy_if(columnNames.begin(), columnNames.end(), std::back_inserter(columnListWithoutSizeColumns),
370 [&](const std::string &name) {
371 if (name[0] == '#') {
372 filteredColumns.emplace_back(name);
373 return false;
374 } else {
375 return true;
376 }
377 });
378
379 if (!filteredColumns.empty()) {
380 std::string msg = "Column name(s) {";
381 for (auto &c : filteredColumns)
382 msg += c + ", ";
383 msg[msg.size() - 2] = '}';
384 msg += "will be ignored. Please go through a valid Alias to " + action + " an array size column";
385 throw std::runtime_error(msg);
386 }
387
388 return columnListWithoutSizeColumns;
389}
390
391std::string ResolveAlias(const std::string &col, const std::map<std::string, std::string> &aliasMap)
392{
393 const auto it = aliasMap.find(col);
394 if (it != aliasMap.end())
395 return it->second;
396
397 // #var is an alias for R_rdf_sizeof_var
398 if (col.size() > 1 && col[0] == '#')
399 return "R_rdf_sizeof_" + col.substr(1);
400
401 return col;
402}
403
404void CheckValidCppVarName(std::string_view var, const std::string &where)
405{
406 bool isValid = true;
407
408 if (var.empty())
409 isValid = false;
410 const char firstChar = var[0];
411
412 // first character must be either a letter or an underscore
413 auto isALetter = [](char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); };
414 const bool isValidFirstChar = firstChar == '_' || isALetter(firstChar);
415 if (!isValidFirstChar)
416 isValid = false;
417
418 // all characters must be either a letter, an underscore or a number
419 auto isANumber = [](char c) { return c >= '0' && c <= '9'; };
420 auto isValidTok = [&isALetter, &isANumber](char c) { return c == '_' || isALetter(c) || isANumber(c); };
421 for (const char c : var)
422 if (!isValidTok(c))
423 isValid = false;
424
425 if (!isValid) {
426 const auto error =
427 "RDataFrame::" + where + ": cannot define column \"" + std::string(var) + "\". Not a valid C++ variable name.";
428 throw std::runtime_error(error);
429 }
430}
431
432///////////////////////////////////////////////////////////////////////////////
433/// Get all the top-level branches names, including the ones of the friend trees
435{
436 std::set<std::string> bNamesSet;
437 ColumnNames_t bNames;
438 std::set<TTree *> analysedTrees;
439 GetTopLevelBranchNamesImpl(t, bNamesSet, bNames, analysedTrees);
440 return bNames;
441}
442
443std::string DemangleTypeIdName(const std::type_info &typeInfo)
444{
445 int dummy(0);
446 char *tn = TClassEdit::DemangleTypeIdName(typeInfo, dummy);
447 std::string tname(tn);
448 free(tn);
449 return tname;
450}
451
453ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
454{
455 const auto theRegexSize = columnNameRegexp.size();
456 std::string theRegex(columnNameRegexp);
457
458 const auto isEmptyRegex = 0 == theRegexSize;
459 // This is to avoid cases where branches called b1, b2, b3 are all matched by expression "b"
460 if (theRegexSize > 0 && theRegex[0] != '^')
461 theRegex = "^" + theRegex;
462 if (theRegexSize > 0 && theRegex[theRegexSize - 1] != '$')
463 theRegex = theRegex + "$";
464
465 ColumnNames_t selectedColumns;
466
467 // Since we support gcc48 and it does not provide in its stl std::regex,
468 // we need to use TPRegexp
469 TPRegexp regexp(theRegex);
470 for (auto &&colName : colNames) {
471 if ((isEmptyRegex || regexp.MatchB(colName.c_str())) && !IsInternalColumn(colName)) {
472 selectedColumns.emplace_back(colName);
473 }
474 }
475
476 if (selectedColumns.empty()) {
477 std::string text(callerName);
478 if (columnNameRegexp.empty()) {
479 text = ": there is no column available to match.";
480 } else {
481 text = ": regex \"" + std::string(columnNameRegexp) + "\" did not match any column.";
482 }
483 throw std::runtime_error(text);
484 }
485 return selectedColumns;
486}
487
488/// Throw if column `definedColView` is already there.
489void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols,
490 const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
491{
492 const std::string definedCol(definedColView); // convert to std::string
493
494 std::string error;
495 if (customCols.IsAlias(definedCol))
496 error = "An alias with that name, pointing to column \"" + customCols.ResolveAlias(definedCol) +
497 "\", already exists in this branch of the computation graph.";
498 else if (customCols.HasName(definedCol))
499 error = "A column with that name has already been Define'd. Use Redefine to force redefinition.";
500 // else, check if definedCol is in the list of tree branches. This is a bit better than interrogating the TTree
501 // directly because correct usage of GetBranch, FindBranch, GetLeaf and FindLeaf can be tricky; so let's assume we
502 // got it right when we collected the list of available branches.
503 else if (std::find(treeColumns.begin(), treeColumns.end(), definedCol) != treeColumns.end())
504 error =
505 "A branch with that name is already present in the input TTree/TChain. Use Redefine to force redefinition.";
506 else if (std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedCol) != dataSourceColumns.end())
507 error =
508 "A column with that name is already present in the input data source. Use Redefine to force redefinition.";
509
510 if (!error.empty()) {
511 error = "RDataFrame::" + where + ": cannot define column \"" + definedCol + "\". " + error;
512 throw std::runtime_error(error);
513 }
514}
515
516/// Throw if column `definedColView` is _not_ already there.
517void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols,
518 const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
519{
520 const std::string definedCol(definedColView); // convert to std::string
521 std::string error;
522
523 if (customCols.IsAlias(definedCol)) {
524 error = "An alias with that name, pointing to column \"" + customCols.ResolveAlias(definedCol) +
525 "\", already exists. Aliases cannot be Redefined or Varied.";
526 }
527
528 if (error.empty()) {
529 const bool isAlreadyDefined = customCols.HasName(definedCol);
530 // check if definedCol is in the list of tree branches. This is a bit better than interrogating the TTree
531 // directly because correct usage of GetBranch, FindBranch, GetLeaf and FindLeaf can be tricky; so let's assume we
532 // got it right when we collected the list of available branches.
533 const bool isABranch = std::find(treeColumns.begin(), treeColumns.end(), definedCol) != treeColumns.end();
534 const bool isADSColumn =
535 std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedCol) != dataSourceColumns.end();
536
537 if (!isAlreadyDefined && !isABranch && !isADSColumn)
538 error = "No column with that name was found in the dataset. Use Define to create a new column.";
539 }
540
541 if (!error.empty()) {
542 error = "RDataFrame::" + where + ": cannot redefine or vary column \"" + definedCol + "\". " + error;
543 throw std::runtime_error(error);
544 }
545}
546
547/// Throw if the column has systematic variations attached.
548void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols)
549{
550 const std::string definedCol(definedColView);
551 const auto &variationDeps = customCols.GetVariationDeps(definedCol);
552 if (!variationDeps.empty()) {
553 const std::string error =
554 "RDataFrame::" + where + ": cannot redefine column \"" + definedCol +
555 "\". The column depends on one or more systematic variations and re-defining varied columns is not supported.";
556 throw std::runtime_error(error);
557 }
558}
559
560void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
561{
562 if (nTemplateParams != nColumnNames) {
563 std::string err_msg = "The number of template parameters specified is ";
564 err_msg += std::to_string(nTemplateParams);
565 err_msg += " while ";
566 err_msg += std::to_string(nColumnNames);
567 err_msg += " columns have been specified.";
568 throw std::runtime_error(err_msg);
569 }
570}
571
572/// Choose between local column names or default column names, throw in case of errors.
573const ColumnNames_t
574SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
575{
576 if (names.empty()) {
577 // use default column names
578 if (defaultNames.size() < nRequiredNames)
579 throw std::runtime_error(
580 std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
581 " required but none were provided and the default list has size " + std::to_string(defaultNames.size()));
582 // return first nRequiredNames default column names
583 return ColumnNames_t(defaultNames.begin(), defaultNames.begin() + nRequiredNames);
584 } else {
585 // use column names provided by the user to this particular transformation/action
586 if (names.size() != nRequiredNames) {
587 auto msg = std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
588 " required but " + std::to_string(names.size()) + (names.size() == 1 ? " was" : " were") +
589 " provided:";
590 for (const auto &name : names)
591 msg += " \"" + name + "\",";
592 msg.back() = '.';
593 throw std::runtime_error(msg);
594 }
595 return names;
596 }
597}
598
599ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const ColumnNames_t &datasetColumns,
600 const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
601{
602 ColumnNames_t unknownColumns;
603 for (auto &column : requiredCols) {
604 const auto isBranch = std::find(datasetColumns.begin(), datasetColumns.end(), column) != datasetColumns.end();
605 if (isBranch)
606 continue;
607 if (definedCols.HasName(column))
608 continue;
609 const auto isDataSourceColumn =
610 std::find(dataSourceColumns.begin(), dataSourceColumns.end(), column) != dataSourceColumns.end();
611 if (isDataSourceColumn)
612 continue;
613 unknownColumns.emplace_back(column);
614 }
615 return unknownColumns;
616}
617
618std::vector<std::string> GetFilterNames(const std::shared_ptr<RLoopManager> &loopManager)
619{
620 return loopManager->GetFiltersNames();
621}
622
623ParsedTreePath ParseTreePath(std::string_view fullTreeName)
624{
625 // split name into directory and treename if needed
626 std::string_view dirName = "";
627 std::string_view treeName = fullTreeName;
628 const auto lastSlash = fullTreeName.rfind('/');
629 if (std::string_view::npos != lastSlash) {
630 dirName = treeName.substr(0, lastSlash);
631 treeName = treeName.substr(lastSlash + 1, treeName.size());
632 }
633 return {std::string(treeName), std::string(dirName)};
634}
635
636std::string PrettyPrintAddr(const void *const addr)
637{
638 std::stringstream s;
639 // Windows-friendly
640 s << std::hex << std::showbase << reinterpret_cast<size_t>(addr);
641 return s.str();
642}
643
644/// Book the jitting of a Filter call
645std::shared_ptr<RDFDetail::RJittedFilter>
646BookFilterJit(std::shared_ptr<RDFDetail::RNodeBase> *prevNodeOnHeap, std::string_view name, std::string_view expression,
647 const ColumnNames_t &branches, const RColumnRegister &customCols, TTree *tree, RDataSource *ds)
648{
649 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
650
651 const auto parsedExpr = ParseRDFExpression(expression, branches, customCols, dsColumns);
652 const auto exprVarTypes =
653 GetValidatedArgTypes(parsedExpr.fUsedCols, customCols, tree, ds, "Filter", /*vector2rvec=*/true);
654 const auto lambdaName = DeclareLambda(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
655 const auto type = RetTypeOfLambda(lambdaName);
656 if (type != "bool")
657 std::runtime_error("Filter: the following expression does not evaluate to bool:\n" + std::string(expression));
658
659 // definesOnHeap is deleted by the jitted call to JitFilterHelper
661 const auto definesOnHeapAddr = PrettyPrintAddr(definesOnHeap);
662 const auto prevNodeAddr = PrettyPrintAddr(prevNodeOnHeap);
663
664 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(
665 (*prevNodeOnHeap)->GetLoopManagerUnchecked(), name,
666 Union(customCols.GetVariationDeps(parsedExpr.fUsedCols), (*prevNodeOnHeap)->GetVariations()));
667
668 // Produce code snippet that creates the filter and registers it with the corresponding RJittedFilter
669 // Windows requires std::hex << std::showbase << (size_t)pointer to produce notation "0x1234"
670 std::stringstream filterInvocation;
671 filterInvocation << "ROOT::Internal::RDF::JitFilterHelper(" << lambdaName << ", new const char*["
672 << parsedExpr.fUsedCols.size() << "]{";
673 for (const auto &col : parsedExpr.fUsedCols)
674 filterInvocation << "\"" << col << "\", ";
675 if (!parsedExpr.fUsedCols.empty())
676 filterInvocation.seekp(-2, filterInvocation.cur); // remove the last ",
677 // lifetime of pointees:
678 // - jittedFilter: heap-allocated weak_ptr to the actual jittedFilter that will be deleted by JitFilterHelper
679 // - prevNodeOnHeap: heap-allocated shared_ptr to the actual previous node that will be deleted by JitFilterHelper
680 // - definesOnHeap: heap-allocated, will be deleted by JitFilterHelper
681 filterInvocation << "}, " << parsedExpr.fUsedCols.size() << ", \"" << name << "\", "
682 << "reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedFilter>*>("
683 << PrettyPrintAddr(MakeWeakOnHeap(jittedFilter)) << "), "
684 << "reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" << prevNodeAddr << "),"
685 << "reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesOnHeapAddr << ")"
686 << ");\n";
687
688 auto lm = jittedFilter->GetLoopManagerUnchecked();
689 lm->ToJitExec(filterInvocation.str());
690
691 return jittedFilter;
692}
693
694/// Book the jitting of a Define call
695std::shared_ptr<RJittedDefine> BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm,
696 RDataSource *ds, const RColumnRegister &customCols,
697 const ColumnNames_t &branches,
698 std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
699{
700 auto *const tree = lm.GetTree();
701 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
702
703 const auto parsedExpr = ParseRDFExpression(expression, branches, customCols, dsColumns);
704 const auto exprVarTypes =
705 GetValidatedArgTypes(parsedExpr.fUsedCols, customCols, tree, ds, "Define", /*vector2rvec=*/true);
706 const auto lambdaName = DeclareLambda(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
707 const auto type = RetTypeOfLambda(lambdaName);
708
709 auto definesCopy = new RColumnRegister(customCols);
710 auto definesAddr = PrettyPrintAddr(definesCopy);
711 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(name, type, lm, customCols, parsedExpr.fUsedCols);
712
713 std::stringstream defineInvocation;
714 defineInvocation << "ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefineTag>("
715 << lambdaName << ", new const char*[" << parsedExpr.fUsedCols.size() << "]{";
716 for (const auto &col : parsedExpr.fUsedCols) {
717 defineInvocation << "\"" << col << "\", ";
718 }
719 if (!parsedExpr.fUsedCols.empty())
720 defineInvocation.seekp(-2, defineInvocation.cur); // remove the last ",
721 // lifetime of pointees:
722 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
723 // - jittedDefine: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
724 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
725 defineInvocation << "}, " << parsedExpr.fUsedCols.size() << ", \"" << name
726 << "\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << PrettyPrintAddr(&lm)
727 << "), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>("
728 << PrettyPrintAddr(MakeWeakOnHeap(jittedDefine))
729 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr
730 << "), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
731 << PrettyPrintAddr(upcastNodeOnHeap) << "));\n";
732
733 lm.ToJitExec(defineInvocation.str());
734 return jittedDefine;
735}
736
737/// Book the jitting of a DefinePerSample call
738std::shared_ptr<RJittedDefine> BookDefinePerSampleJit(std::string_view name, std::string_view expression,
739 RLoopManager &lm, const RColumnRegister &customCols,
740 std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
741{
742 const auto lambdaName = DeclareLambda(std::string(expression), {"rdfslot_", "rdfsampleinfo_"},
743 {"unsigned int", "const ROOT::RDF::RSampleInfo"});
744 const auto retType = RetTypeOfLambda(lambdaName);
745
746 auto definesCopy = new RColumnRegister(customCols);
747 auto definesAddr = PrettyPrintAddr(definesCopy);
748 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(name, retType, lm, customCols, ColumnNames_t{});
749
750 std::stringstream defineInvocation;
751 defineInvocation << "ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefinePerSampleTag>("
752 << lambdaName << ", nullptr, 0, ";
753 // lifetime of pointees:
754 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
755 // - jittedDefine: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
756 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
757 defineInvocation << "\"" << name << "\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << PrettyPrintAddr(&lm)
758 << "), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>("
759 << PrettyPrintAddr(MakeWeakOnHeap(jittedDefine))
760 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr
761 << "), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
762 << PrettyPrintAddr(upcastNodeOnHeap) << "));\n";
763
764 lm.ToJitExec(defineInvocation.str());
765 return jittedDefine;
766}
767
768/// Book the jitting of a Vary call
769std::shared_ptr<RJittedVariation>
770BookVariationJit(const std::vector<std::string> &colNames, std::string_view variationName,
771 const std::vector<std::string> &variationTags, std::string_view expression, RLoopManager &lm,
772 RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches,
773 std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
774{
775 auto *const tree = lm.GetTree();
776 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
777
778 const auto parsedExpr = ParseRDFExpression(expression, branches, colRegister, dsColumns);
779 const auto exprVarTypes =
780 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, tree, ds, "Vary", /*vector2rvec=*/true);
781 const auto lambdaName = DeclareLambda(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
782 const auto type = RetTypeOfLambda(lambdaName);
783
784 if (type.rfind("ROOT::VecOps::RVec", 0) != 0)
785 throw std::runtime_error(
786 "Jitted Vary expressions must return an RVec object. The following expression returns a " + type +
787 " instead:\n" + parsedExpr.fExpr);
788
789 auto colRegisterCopy = new RColumnRegister(colRegister);
790 const auto colRegisterAddr = PrettyPrintAddr(colRegisterCopy);
791 auto jittedVariation = std::make_shared<RJittedVariation>(colNames, variationName, variationTags, type, colRegister,
792 lm, parsedExpr.fUsedCols);
793
794 // build invocation to JitVariationHelper
795 // arrays of strings are passed as const char** plus size.
796 // lifetime of pointees:
797 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
798 // - jittedVariation: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
799 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
800 std::stringstream varyInvocation;
801 varyInvocation << "ROOT::Internal::RDF::JitVariationHelper(" << lambdaName << ", new const char*["
802 << parsedExpr.fUsedCols.size() << "]{";
803 for (const auto &col : parsedExpr.fUsedCols) {
804 varyInvocation << "\"" << col << "\", ";
805 }
806 if (!parsedExpr.fUsedCols.empty())
807 varyInvocation.seekp(-2, varyInvocation.cur); // remove the last ", "
808 varyInvocation << "}, " << parsedExpr.fUsedCols.size();
809 varyInvocation << ", new const char*[" << colNames.size() << "]{";
810 for (const auto &col : colNames) {
811 varyInvocation << "\"" << col << "\", ";
812 }
813 varyInvocation.seekp(-2, varyInvocation.cur); // remove the last ", "
814 varyInvocation << "}, " << colNames.size() << ", new const char*[" << variationTags.size() << "]{";
815 for (const auto &tag : variationTags) {
816 varyInvocation << "\"" << tag << "\", ";
817 }
818 varyInvocation.seekp(-2, varyInvocation.cur); // remove the last ", "
819 varyInvocation << "}, " << variationTags.size() << ", \"" << variationName
820 << "\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << PrettyPrintAddr(&lm)
821 << "), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedVariation>*>("
822 << PrettyPrintAddr(MakeWeakOnHeap(jittedVariation))
823 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << colRegisterAddr
824 << "), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
825 << PrettyPrintAddr(upcastNodeOnHeap) << "));\n";
826
827 lm.ToJitExec(varyInvocation.str());
828 return jittedVariation;
829}
830
831// Jit and call something equivalent to "this->BuildAndBook<ColTypes...>(params...)"
832// (see comments in the body for actual jitted code)
833std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr<RDFDetail::RNodeBase> *prevNode,
834 const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap,
835 TTree *tree, const unsigned int nSlots, const RColumnRegister &customCols, RDataSource *ds,
836 std::weak_ptr<RJittedAction> *jittedActionOnHeap)
837{
838 // retrieve type of action as a string
839 auto actionTypeClass = TClass::GetClass(at);
840 if (!actionTypeClass) {
841 std::string exceptionText = "An error occurred while inferring the action type of the operation.";
842 throw std::runtime_error(exceptionText);
843 }
844 const std::string actionTypeName = actionTypeClass->GetName();
845 const std::string actionTypeNameBase = actionTypeName.substr(actionTypeName.rfind(':') + 1);
846
847 // retrieve type of result of the action as a string
848 const auto helperArgTypeName = TypeID2TypeName(helperArgType);
849 if (helperArgTypeName.empty()) {
850 ThrowJitBuildActionHelperTypeError(actionTypeNameBase, helperArgType);
851 }
852
853 auto definesCopy = new RColumnRegister(customCols); // deleted in jitted CallBuildAction
854 auto definesAddr = PrettyPrintAddr(definesCopy);
855
856 // Build a call to CallBuildAction with the appropriate argument. When run through the interpreter, this code will
857 // just-in-time create an RAction object and it will assign it to its corresponding RJittedAction.
858 std::stringstream createAction_str;
859 createAction_str << "ROOT::Internal::RDF::CallBuildAction<" << actionTypeName;
860 const auto columnTypeNames =
861 GetValidatedArgTypes(cols, customCols, tree, ds, actionTypeNameBase, /*vector2rvec=*/true);
862 for (auto &colType : columnTypeNames)
863 createAction_str << ", " << colType;
864 // on Windows, to prefix the hexadecimal value of a pointer with '0x',
865 // one need to write: std::hex << std::showbase << (size_t)pointer
866 createAction_str << ">(reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
867 << PrettyPrintAddr(prevNode) << "), new const char*[" << cols.size() << "]{";
868 for (auto i = 0u; i < cols.size(); ++i) {
869 if (i != 0u)
870 createAction_str << ", ";
871 createAction_str << '"' << cols[i] << '"';
872 }
873 createAction_str << "}, " << cols.size() << ", " << nSlots << ", reinterpret_cast<shared_ptr<" << helperArgTypeName
874 << ">*>(" << PrettyPrintAddr(helperArgOnHeap)
875 << "), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedAction>*>("
876 << PrettyPrintAddr(jittedActionOnHeap)
877 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr << "));";
878 return createAction_str.str();
879}
880
881bool AtLeastOneEmptyString(const std::vector<std::string_view> strings)
882{
883 for (const auto &s : strings) {
884 if (s.empty())
885 return true;
886 }
887 return false;
888}
889
890std::shared_ptr<RNodeBase> UpcastNode(std::shared_ptr<RNodeBase> ptr)
891{
892 return ptr;
893}
894
895/// Given the desired number of columns and the user-provided list of columns:
896/// * fallback to using the first nColumns default columns if needed (or throw if nColumns > nDefaultColumns)
897/// * check that selected column names refer to valid branches, custom columns or datasource columns (throw if not)
898/// * replace column names from aliases by the actual column name
899/// Return the list of selected column names.
900ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns,
901 const RColumnRegister &customColumns, RDataSource *ds)
902{
903 auto selectedColumns = SelectColumns(nColumns, columns, lm.GetDefaultColumnNames());
904
905 for (auto &col : selectedColumns) {
906 col = customColumns.ResolveAlias(col);
907 }
908
909 // Complain if there are still unknown columns at this point
910 const auto unknownColumns = FindUnknownColumns(selectedColumns, lm.GetBranchNames(), customColumns,
911 ds ? ds->GetColumnNames() : ColumnNames_t{});
912
913 if (!unknownColumns.empty()) {
914 std::stringstream unknowns;
915 std::string delim = unknownColumns.size() > 1 ? "s: " : ": "; // singular/plural
916 for (auto &unknownColumn : unknownColumns) {
917 unknowns << delim << unknownColumn;
918 delim = ',';
919 }
920 throw std::runtime_error("Unknown column" + unknowns.str());
921 }
922
923 return selectedColumns;
924}
925
926std::vector<std::string> GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister,
927 TTree *tree, RDataSource *ds, const std::string &context,
928 bool vector2rvec)
929{
930 auto toCheckedArgType = [&](const std::string &c) {
931 RDFDetail::RDefineBase *define = colRegister.HasName(c) ? colRegister.GetColumns().at(c).get() : nullptr;
932 const auto colType = ColumnName2ColumnTypeName(c, tree, ds, define, vector2rvec);
933 if (colType.rfind("CLING_UNKNOWN_TYPE", 0) == 0) { // the interpreter does not know this type
934 const auto msg =
935 "The type of custom column \"" + c + "\" (" + colType.substr(19) +
936 ") is not known to the interpreter, but a just-in-time-compiled " + context +
937 " call requires this column. Make sure to create and load ROOT dictionaries for this column's class.";
938 throw std::runtime_error(msg);
939 }
940 return colType;
941 };
942 std::vector<std::string> colTypes;
943 colTypes.reserve(colNames.size());
944 std::transform(colNames.begin(), colNames.end(), std::back_inserter(colTypes), toCheckedArgType);
945 return colTypes;
946}
947
948/// Return a bitset each element of which indicates whether the corresponding element in `selectedColumns` is the
949/// name of a column that must be defined via datasource. All elements of the returned vector are false if no
950/// data-source is present.
951std::vector<bool> FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedCols)
952{
953 const auto nColumns = requestedCols.size();
954 std::vector<bool> mustBeDefined(nColumns, false);
955 for (auto i = 0u; i < nColumns; ++i)
956 mustBeDefined[i] = std::find(definedCols.begin(), definedCols.end(), requestedCols[i]) == definedCols.end();
957 return mustBeDefined;
958}
959
961{
962 std::unordered_set<std::string> uniqueCols;
963 for (auto &col : cols) {
964 if (!uniqueCols.insert(col).second) {
965 const auto msg = "Error: column \"" + col +
966 "\" was passed to Snapshot twice. This is not supported: only one of the columns would be "
967 "readable with RDataFrame.";
968 throw std::logic_error(msg);
969 }
970 }
971}
972
973////////////////////////////////////////////////////////////////////////////////
974/// \brief Trigger the execution of an RDataFrame computation graph.
975/// \param[in] node A node of the computation graph (not a result).
976///
977/// This function calls the RLoopManager::Run method on the \p fLoopManager data
978/// member of the input argument. It is intended for internal use only.
980 node.fLoopManager->Run();
981}
982
983} // namespace RDF
984} // namespace Internal
985} // namespace ROOT
#define c(i)
Definition: RSha256.hxx:101
#define R__ASSERT(e)
Definition: TError.h:118
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char cname
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char text
char name[80]
Definition: TGX11.cxx:110
R__EXTERN TVirtualMutex * gROOTMutex
Definition: TROOT.h:61
#define gROOT
Definition: TROOT.h:404
#define R__LOCKGUARD(mutex)
#define free
Definition: civetweb.c:1539
The head node of a RDF computation graph.
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
const ColumnNames_t & GetDefaultColumnNames() const
Return the list of default columns – empty if none was provided when constructing the RDataFrame.
A binder for user-defined columns and aliases.
bool IsAlias(const std::string &name) const
Return true if the given column name is an existing alias.
const DefinesMap_t & GetColumns() const
Returns a map of pointers to the defined columns.
bool HasName(std::string_view name) const
Check if the provided name is tracked in the names list.
std::string ResolveAlias(std::string_view alias) const
Return the actual column name that the alias resolves to.
std::vector< std::string > GetVariationDeps(const std::string &column) const
Get the names of all variations that directly or indirectly affect a given column.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
The public interface to the RDataFrame federation of classes.
Definition: RInterface.hxx:104
RLoopManager * fLoopManager
Definition: RInterface.hxx:119
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition: TClass.cxx:2968
A TFriendElement TF describes a TTree object TF in a file.
virtual TTree * GetTree()
Return pointer to friend TTree.
Bool_t MatchB(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
Definition: TPRegexp.h:78
Basic string class.
Definition: TString.h:136
A TTree represents a columnar dataset.
Definition: TTree.h:79
virtual TObjArray * GetListOfBranches()
Definition: TTree.h:484
virtual TList * GetListOfFriends() const
Definition: TTree.h:486
basic_string_view< char > string_view
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Vary call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:224
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:99
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &customColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
Definition: RDFUtils.cxx:419
std::string ResolveAlias(const std::string &col, const std::map< std::string, std::string > &aliasMap)
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string PrettyPrintAddr(const void *const addr)
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &customCols, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
ColumnNames_t GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
std::string DemangleTypeIdName(const std::type_info &typeInfo)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::vector< T > Union(const std::vector< T > &v1, const std::vector< T > &v2)
Return a vector with all elements of v1 and v2 and duplicates removed.
Definition: Utils.hxx:274
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition: RDFUtils.cxx:365
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
Definition: RDFUtils.cxx:317
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &customCols, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::vector< bool > FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedCols)
Return a bitset each element of which indicates whether the corresponding element in selectedColumns ...
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &customCols, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols)
Throw if the column has systematic variations attached.
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const ColumnNames_t &datasetColumns, const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
void TriggerRun(ROOT::RDF::RNode &node)
Trigger the execution of an RDataFrame computation graph.
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &customCols, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
std::vector< std::string > ColumnNames_t
Definition: Utils.hxx:35
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
static constexpr double s
Definition: tree.py:1