Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFInterfaceUtils.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 02/2018
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#include <ROOT/RDataSource.hxx>
14#include <ROOT/RDF/RDisplay.hxx>
21#include <ROOT/RDF/Utils.hxx>
22#include <string_view>
23#include <TBranch.h>
24#include <TClass.h>
25#include <TClassEdit.h>
26#include <TDataType.h>
27#include <TError.h>
28#include <TLeaf.h>
29#include <TObjArray.h>
30#include <TPRegexp.h>
31#include <TROOT.h>
32#include <TString.h>
33#include <TTree.h>
34#include <TVirtualMutex.h>
35
36// pragma to disable warnings on Rcpp which have
37// so many noise compiling
38#if defined(__GNUC__)
39#pragma GCC diagnostic push
40#pragma GCC diagnostic ignored "-Woverloaded-virtual"
41#pragma GCC diagnostic ignored "-Wshadow"
42#endif
43#include "lexertk.hpp"
44#if defined(__GNUC__)
45#pragma GCC diagnostic pop
46#endif
47
48#include <algorithm>
49#include <cassert>
50#include <cstdlib> // for size_t
51#include <iterator> // for back_insert_iterator
52#include <map>
53#include <memory>
54#include <set>
55#include <sstream>
56#include <stdexcept>
57#include <string>
58#include <type_traits> // for remove_reference<>::type
59#include <typeinfo>
60#include <unordered_map>
61#include <unordered_set>
62#include <utility> // for pair
63#include <vector>
64
65namespace ROOT {
66namespace Detail {
67namespace RDF {
68class RDefineBase;
69} // namespace RDF
70namespace Internal {
71namespace RDF {
72class RJittedAction;
73}
74} // namespace Internal
75} // namespace Detail
76
77} // namespace ROOT
78
79namespace {
82
83/// A string expression such as those passed to Filter and Define, digested to a standardized form
84struct ParsedExpression {
85 /// The string expression with the dummy variable names in fVarNames in place of the original column names
86 std::string fExpr;
87 /// The list of valid column names that were used in the original string expression.
88 /// Duplicates are removed and column aliases (created with Alias calls) are resolved.
89 ColumnNames_t fUsedCols;
90 /// The list of variable names used in fExpr, with same ordering and size as fUsedCols
91 ColumnNames_t fVarNames;
92};
93
94/// Look at expression `expr` and return a pair of (column names used, aliases used)
95std::pair<ColumnNames_t, ColumnNames_t>
96FindUsedColsAndAliases(const std::string &expr, const ColumnNames_t &treeBranchNames,
97 const ROOT::Internal::RDF::RColumnRegister &colRegister, const ColumnNames_t &dataSourceColNames)
98{
99 lexertk::generator tokens;
100 const auto tokensOk = tokens.process(expr);
101 if (!tokensOk) {
102 const auto msg = "Failed to tokenize expression:\n" + expr + "\n\nMake sure it is valid C++.";
103 throw std::runtime_error(msg);
104 }
105
106 std::unordered_set<std::string> usedCols;
107 std::unordered_set<std::string> usedAliases;
108
109 // iterate over tokens in expression and fill usedCols and usedAliases
110 const auto nTokens = tokens.size();
111 const auto kSymbol = lexertk::token::e_symbol;
112 for (auto i = 0u; i < nTokens; ++i) {
113 const auto &tok = tokens[i];
114 // lexertk classifies '&' as e_symbol for some reason
115 if (tok.type != kSymbol || tok.value == "&" || tok.value == "|") {
116 // token is not a potential variable name, skip it
117 continue;
118 }
119
120 ColumnNames_t potentialColNames({tok.value});
121
122 // if token is the start of a dot chain (a.b.c...), a.b, a.b.c etc. are also potential column names
123 auto dotChainKeepsGoing = [&](unsigned int _i) {
124 return _i + 2 <= nTokens && tokens[_i + 1].value == "." && tokens[_i + 2].type == kSymbol;
125 };
126 while (dotChainKeepsGoing(i)) {
127 potentialColNames.emplace_back(potentialColNames.back() + "." + tokens[i + 2].value);
128 i += 2; // consume the tokens we looked at
129 }
130
131 // in an expression such as `a.b`, if `a` is a column alias add it to `usedAliases` and
132 // replace the alias with the real column name in `potentialColNames`.
133 const auto maybeAnAlias = potentialColNames[0]; // intentionally a copy as we'll modify potentialColNames later
134 const auto &resolvedAlias = colRegister.ResolveAlias(maybeAnAlias);
135 if (resolvedAlias != maybeAnAlias) { // this is an alias
136 usedAliases.insert(maybeAnAlias);
137 for (auto &s : potentialColNames)
138 s.replace(0, maybeAnAlias.size(), resolvedAlias);
139 }
140
141 // find the longest potential column name that is an actual column name
142 // (potential columns are sorted by length, so we search from the end to find the longest)
143 auto isRDFColumn = [&](const std::string &col) {
144 if (colRegister.IsDefineOrAlias(col) || IsStrInVec(col, treeBranchNames) ||
145 IsStrInVec(col, dataSourceColNames))
146 return true;
147 return false;
148 };
149 const auto longestRDFColMatch = std::find_if(potentialColNames.crbegin(), potentialColNames.crend(), isRDFColumn);
150 if (longestRDFColMatch != potentialColNames.crend())
151 usedCols.insert(*longestRDFColMatch);
152 }
153
154 return {{usedCols.begin(), usedCols.end()}, {usedAliases.begin(), usedAliases.end()}};
155}
156
157/// Substitute each '.' in a string with '\.'
158std::string EscapeDots(const std::string &s)
159{
160 TString out(s);
161 TPRegexp dot("\\.");
162 dot.Substitute(out, "\\.", "g");
163 return std::string(std::move(out));
164}
165
166TString ResolveAliases(const TString &expr, const ColumnNames_t &usedAliases,
167 const ROOT::Internal::RDF::RColumnRegister &colRegister)
168{
169 TString out(expr);
170
171 for (const auto &alias : usedAliases) {
172 const auto &col = colRegister.ResolveAlias(alias);
173 TPRegexp replacer("\\b" + EscapeDots(alias) + "\\b");
174 replacer.Substitute(out, col.data(), "g");
175 }
176
177 return out;
178}
179
180ParsedExpression ParseRDFExpression(std::string_view expr, const ColumnNames_t &treeBranchNames,
181 const ROOT::Internal::RDF::RColumnRegister &colRegister,
182 const ColumnNames_t &dataSourceColNames)
183{
184 // transform `#var` into `R_rdf_sizeof_var`
185 TString preProcessedExpr(expr);
186 // match #varname at beginning of the sentence or after not-a-word, but exclude preprocessor directives like #ifdef
187 TPRegexp colSizeReplacer(
188 "(^|\\W)#(?!(ifdef|ifndef|if|else|elif|endif|pragma|define|undef|include|line))([a-zA-Z_][a-zA-Z0-9_]*)");
189 colSizeReplacer.Substitute(preProcessedExpr, "$1R_rdf_sizeof_$3", "g");
190
191 ColumnNames_t usedCols;
192 ColumnNames_t usedAliases;
193 std::tie(usedCols, usedAliases) =
194 FindUsedColsAndAliases(std::string(preProcessedExpr), treeBranchNames, colRegister, dataSourceColNames);
195
196 const auto exprNoAliases = ResolveAliases(preProcessedExpr, usedAliases, colRegister);
197
198 // when we are done, exprWithVars willl be the same as preProcessedExpr but column names will be substituted with
199 // the dummy variable names in varNames
200 TString exprWithVars(exprNoAliases);
201
202 ColumnNames_t varNames(usedCols.size());
203 for (auto i = 0u; i < varNames.size(); ++i)
204 varNames[i] = "var" + std::to_string(i);
205
206 // sort the vector usedColsAndAliases by decreasing length of its elements,
207 // so in case of friends we guarantee we never substitute a column name with another column containing it
208 // ex. without sorting when passing "x" and "fr.x", the replacer would output "var0" and "fr.var0",
209 // because it has already substituted "x", hence the "x" in "fr.x" would be recognized as "var0",
210 // whereas the desired behaviour is handling them as "var0" and "var1"
211 std::sort(usedCols.begin(), usedCols.end(),
212 [](const std::string &a, const std::string &b) { return a.size() > b.size(); });
213 for (const auto &col : usedCols) {
214 const auto varIdx = std::distance(usedCols.begin(), std::find(usedCols.begin(), usedCols.end(), col));
215 TPRegexp replacer("\\b" + EscapeDots(col) + "\\b");
216 replacer.Substitute(exprWithVars, varNames[varIdx], "g");
217 }
218
219 return ParsedExpression{std::string(std::move(exprWithVars)), std::move(usedCols), std::move(varNames)};
220}
221
222/// Return the static global map of Filter/Define functions that have been jitted.
223/// It's used to check whether a given expression has already been jitted, and
224/// to look up its associated variable name if it is.
225/// Keys in the map are the body of the expression, values are the name of the
226/// jitted variable that corresponds to that expression. For example, for:
227/// auto f1(){ return 42; }
228/// key would be "(){ return 42; }" and value would be "f1".
229std::unordered_map<std::string, std::string> &GetJittedExprs() {
230 static std::unordered_map<std::string, std::string> jittedExpressions;
231 return jittedExpressions;
232}
233
234std::string
235BuildFunctionString(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes)
236{
237 assert(vars.size() == varTypes.size());
238
239 TPRegexp re(R"(\breturn\b)");
240 const bool hasReturnStmt = re.MatchB(expr);
241
242 static const std::vector<std::string> fundamentalTypes = {
243 "int",
244 "signed",
245 "signed int",
246 "Int_t",
247 "unsigned",
248 "unsigned int",
249 "UInt_t",
250 "double",
251 "Double_t",
252 "float",
253 "Float_t",
254 "char",
255 "Char_t",
256 "unsigned char",
257 "UChar_t",
258 "bool",
259 "Bool_t",
260 "short",
261 "short int",
262 "Short_t",
263 "long",
264 "long int",
265 "long long int",
266 "Long64_t",
267 "unsigned long",
268 "unsigned long int",
269 "ULong64_t",
270 "std::size_t",
271 "size_t",
272 "Ssiz_t"
273 };
274
275 std::stringstream ss;
276 ss << "(";
277 for (auto i = 0u; i < vars.size(); ++i) {
278 std::string fullType;
279 const auto &type = varTypes[i];
280 if (std::find(fundamentalTypes.begin(), fundamentalTypes.end(), type) != fundamentalTypes.end()) {
281 // pass it by const value to help detect common mistakes such as if(x = 3)
282 fullType = "const " + type + " ";
283 } else {
284 // We pass by reference to avoid expensive copies
285 // It can't be const reference in general, as users might want/need to call non-const methods on the values
286 fullType = type + "& ";
287 }
288 ss << fullType << vars[i] << ", ";
289 }
290 if (!vars.empty())
291 ss.seekp(-2, ss.cur);
292
293 if (hasReturnStmt)
294 ss << "){";
295 else
296 ss << "){return ";
297 ss << expr << "\n;}";
298
299 return ss.str();
300}
301
302/// Declare a function to the interpreter in namespace R_rdf, return the name of the jitted function.
303/// If the function is already in GetJittedExprs, return the name for the function that has already been jitted.
304std::string DeclareFunction(const std::string &expr, const ColumnNames_t &vars, const ColumnNames_t &varTypes)
305{
307
308 const auto funcCode = BuildFunctionString(expr, vars, varTypes);
309 auto &exprMap = GetJittedExprs();
310 const auto exprIt = exprMap.find(funcCode);
311 if (exprIt != exprMap.end()) {
312 // expression already there
313 const auto funcName = exprIt->second;
314 return funcName;
315 }
316
317 // new expression
318 const auto funcBaseName = "func" + std::to_string(exprMap.size());
319 const auto funcFullName = "R_rdf::" + funcBaseName;
320
321 const auto toDeclare = "namespace R_rdf {\nauto " + funcBaseName + funcCode + "\nusing " + funcBaseName +
322 "_ret_t = typename ROOT::TypeTraits::CallableTraits<decltype(" + funcBaseName +
323 ")>::ret_type;\n}";
325
326 // InterpreterDeclare could throw. If it doesn't, mark the function as already jitted
327 exprMap.insert({funcCode, funcFullName});
328
329 return funcFullName;
330}
331
332/// Each jitted function comes with a func_ret_t type alias for its return type.
333/// Resolve that alias and return the true type as string.
334std::string RetTypeOfFunc(const std::string &funcName)
335{
336 const auto dt = gROOT->GetType((funcName + "_ret_t").c_str());
337 R__ASSERT(dt != nullptr);
338 const auto type = dt->GetFullTypeName();
339 return type;
340}
341
342[[noreturn]] void
343ThrowJitBuildActionHelperTypeError(const std::string &actionTypeNameBase, const std::type_info &helperArgType)
344{
345 int err = 0;
346 const char *cname = TClassEdit::DemangleTypeIdName(helperArgType, err);
347 std::string actionHelperTypeName = cname;
348 delete[] cname;
349 if (err != 0)
350 actionHelperTypeName = helperArgType.name();
351
352 std::string exceptionText =
353 "RDataFrame::Jit: cannot just-in-time compile a \"" + actionTypeNameBase + "\" action using helper type \"" +
354 actionHelperTypeName +
355 "\". This typically happens in a custom `Fill` or `Book` invocation where the types of the input columns have "
356 "not been specified as template parameters and the ROOT interpreter has no knowledge of this type of action "
357 "helper. Please add template parameters for the types of the input columns to avoid jitting this action (i.e. "
358 "`df.Fill<float>(..., {\"x\"})`, where `float` is the type of `x`) or declare the action helper type to the "
359 "interpreter, e.g. via gInterpreter->Declare.";
360
361 throw std::runtime_error(exceptionText);
362}
363
364} // anonymous namespace
365
366namespace ROOT {
367namespace Internal {
368namespace RDF {
369
370/// Take a list of column names, return that list with entries starting by '#' filtered out.
371/// The function throws when filtering out a column this way.
372ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
373{
374 ColumnNames_t columnListWithoutSizeColumns;
375 ColumnNames_t filteredColumns;
376 std::copy_if(columnNames.begin(), columnNames.end(), std::back_inserter(columnListWithoutSizeColumns),
377 [&](const std::string &name) {
378 if (name[0] == '#') {
379 filteredColumns.emplace_back(name);
380 return false;
381 } else {
382 return true;
383 }
384 });
385
386 if (!filteredColumns.empty()) {
387 std::string msg = "Column name(s) {";
388 for (auto &c : filteredColumns)
389 msg += c + ", ";
390 msg[msg.size() - 2] = '}';
391 msg += "will be ignored. Please go through a valid Alias to " + action + " an array size column";
392 throw std::runtime_error(msg);
393 }
394
395 return columnListWithoutSizeColumns;
396}
397
398std::string ResolveAlias(const std::string &col, const std::map<std::string, std::string> &aliasMap)
399{
400 const auto it = aliasMap.find(col);
401 if (it != aliasMap.end())
402 return it->second;
403
404 // #var is an alias for R_rdf_sizeof_var
405 if (col.size() > 1 && col[0] == '#')
406 return "R_rdf_sizeof_" + col.substr(1);
407
408 return col;
409}
410
411void CheckValidCppVarName(std::string_view var, const std::string &where)
412{
413 bool isValid = true;
414
415 if (var.empty())
416 isValid = false;
417 const char firstChar = var[0];
418
419 // first character must be either a letter or an underscore
420 auto isALetter = [](char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); };
421 const bool isValidFirstChar = firstChar == '_' || isALetter(firstChar);
422 if (!isValidFirstChar)
423 isValid = false;
424
425 // all characters must be either a letter, an underscore or a number
426 auto isANumber = [](char c) { return c >= '0' && c <= '9'; };
427 auto isValidTok = [&isALetter, &isANumber](char c) { return c == '_' || isALetter(c) || isANumber(c); };
428 for (const char c : var)
429 if (!isValidTok(c))
430 isValid = false;
431
432 if (!isValid) {
433 const auto objName = where == "Define" ? "column" : "variation";
434 const auto error = "RDataFrame::" + where + ": cannot define " + objName + " \"" + std::string(var) +
435 "\". Not a valid C++ variable name.";
436 throw std::runtime_error(error);
437 }
438}
439
440std::string DemangleTypeIdName(const std::type_info &typeInfo)
441{
442 int dummy(0);
443 char *tn = TClassEdit::DemangleTypeIdName(typeInfo, dummy);
444 std::string tname(tn);
445 free(tn);
446 return tname;
447}
448
449ColumnNames_t
450ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
451{
452 const auto theRegexSize = columnNameRegexp.size();
453 std::string theRegex(columnNameRegexp);
454
455 const auto isEmptyRegex = 0 == theRegexSize;
456 // This is to avoid cases where branches called b1, b2, b3 are all matched by expression "b"
457 if (theRegexSize > 0 && theRegex[0] != '^')
458 theRegex = "^" + theRegex;
459 if (theRegexSize > 0 && theRegex[theRegexSize - 1] != '$')
460 theRegex = theRegex + "$";
461
462 ColumnNames_t selectedColumns;
463
464 // Since we support gcc48 and it does not provide in its stl std::regex,
465 // we need to use TPRegexp
466 TPRegexp regexp(theRegex);
467 for (auto &&colName : colNames) {
468 if ((isEmptyRegex || regexp.MatchB(colName.c_str())) && !IsInternalColumn(colName)) {
469 selectedColumns.emplace_back(colName);
470 }
471 }
472
473 if (selectedColumns.empty()) {
474 std::string text(callerName);
475 if (columnNameRegexp.empty()) {
476 text = ": there is no column available to match.";
477 } else {
478 text = ": regex \"" + std::string(columnNameRegexp) + "\" did not match any column.";
479 }
480 throw std::runtime_error(text);
481 }
482 return selectedColumns;
483}
484
485/// Throw if column `definedColView` is already there.
486void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister,
487 const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
488{
489
490 std::string error{};
491 if (colRegister.IsAlias(definedColView))
492 error = "An alias with that name, pointing to column \"" + std::string(colRegister.ResolveAlias(definedColView)) +
493 "\", already exists in this branch of the computation graph.";
494 else if (colRegister.IsDefineOrAlias(definedColView))
495 error = "A column with that name has already been Define'd. Use Redefine to force redefinition.";
496 // else, check if definedColView is in the list of tree branches. This is a bit better than interrogating the TTree
497 // directly because correct usage of GetBranch, FindBranch, GetLeaf and FindLeaf can be tricky; so let's assume we
498 // got it right when we collected the list of available branches.
499 else if (std::find(treeColumns.begin(), treeColumns.end(), definedColView) != treeColumns.end())
500 error =
501 "A branch with that name is already present in the input TTree/TChain. Use Redefine to force redefinition.";
502 else if (std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedColView) != dataSourceColumns.end())
503 error =
504 "A column with that name is already present in the input data source. Use Redefine to force redefinition.";
505
506 if (!error.empty()) {
507 error = "RDataFrame::" + where + ": cannot define column \"" + std::string(definedColView) + "\". " + error;
508 throw std::runtime_error(error);
509 }
510}
511
512/// Throw if column `definedColView` is _not_ already there.
513void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister,
514 const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
515{
516 std::string error{};
517
518 if (colRegister.IsAlias(definedColView)) {
519 error = "An alias with that name, pointing to column \"" + std::string(colRegister.ResolveAlias(definedColView)) +
520 "\", already exists. Aliases cannot be Redefined or Varied.";
521 }
522
523 if (error.empty()) {
524 const bool isAlreadyDefined = colRegister.IsDefineOrAlias(definedColView);
525 // check if definedCol is in the list of tree branches. This is a bit better than interrogating the TTree
526 // directly because correct usage of GetBranch, FindBranch, GetLeaf and FindLeaf can be tricky; so let's assume we
527 // got it right when we collected the list of available branches.
528 const bool isABranch = std::find(treeColumns.begin(), treeColumns.end(), definedColView) != treeColumns.end();
529 const bool isADSColumn =
530 std::find(dataSourceColumns.begin(), dataSourceColumns.end(), definedColView) != dataSourceColumns.end();
531
532 if (!isAlreadyDefined && !isABranch && !isADSColumn)
533 error = "No column with that name was found in the dataset. Use Define to create a new column.";
534 }
535
536 if (!error.empty()) {
537 error =
538 "RDataFrame::" + where + ": cannot redefine or vary column \"" + std::string(definedColView) + "\". " + error;
539 throw std::runtime_error(error);
540 }
541}
542
543/// Throw if the column has systematic variations attached.
544void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
545{
546 const std::string definedCol(definedColView);
547 const auto &variationDeps = colRegister.GetVariationDeps(definedCol);
548 if (!variationDeps.empty()) {
549 const std::string error =
550 "RDataFrame::" + where + ": cannot redefine column \"" + definedCol +
551 "\". The column depends on one or more systematic variations and re-defining varied columns is not supported.";
552 throw std::runtime_error(error);
553 }
554}
555
556void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
557{
558 if (nTemplateParams != nColumnNames) {
559 std::string err_msg = "The number of template parameters specified is ";
560 err_msg += std::to_string(nTemplateParams);
561 err_msg += " while ";
562 err_msg += std::to_string(nColumnNames);
563 err_msg += " columns have been specified.";
564 throw std::runtime_error(err_msg);
565 }
566}
567
568/// Choose between local column names or default column names, throw in case of errors.
569const ColumnNames_t
570SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
571{
572 if (names.empty()) {
573 // use default column names
574 if (defaultNames.size() < nRequiredNames)
575 throw std::runtime_error(
576 std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
577 " required but none were provided and the default list has size " + std::to_string(defaultNames.size()));
578 // return first nRequiredNames default column names
579 return ColumnNames_t(defaultNames.begin(), defaultNames.begin() + nRequiredNames);
580 } else {
581 // use column names provided by the user to this particular transformation/action
582 if (names.size() != nRequiredNames) {
583 auto msg = std::to_string(nRequiredNames) + " column name" + (nRequiredNames == 1 ? " is" : "s are") +
584 " required but " + std::to_string(names.size()) + (names.size() == 1 ? " was" : " were") +
585 " provided:";
586 for (const auto &name : names)
587 msg += " \"" + name + "\",";
588 msg.back() = '.';
589 throw std::runtime_error(msg);
590 }
591 return names;
592 }
593}
594
595ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const ColumnNames_t &datasetColumns,
596 const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
597{
598 ColumnNames_t unknownColumns;
599 for (auto &column : requiredCols) {
600 const auto isBranch = std::find(datasetColumns.begin(), datasetColumns.end(), column) != datasetColumns.end();
601 if (isBranch)
602 continue;
603 if (definedCols.IsDefineOrAlias(column))
604 continue;
605 const auto isDataSourceColumn =
606 std::find(dataSourceColumns.begin(), dataSourceColumns.end(), column) != dataSourceColumns.end();
607 if (isDataSourceColumn)
608 continue;
609 unknownColumns.emplace_back(column);
610 }
611 return unknownColumns;
612}
613
614std::vector<std::string> GetFilterNames(const std::shared_ptr<RLoopManager> &loopManager)
615{
616 return loopManager->GetFiltersNames();
617}
618
619ParsedTreePath ParseTreePath(std::string_view fullTreeName)
620{
621 // split name into directory and treename if needed
622 std::string_view dirName = "";
623 std::string_view treeName = fullTreeName;
624 const auto lastSlash = fullTreeName.rfind('/');
625 if (std::string_view::npos != lastSlash) {
626 dirName = treeName.substr(0, lastSlash);
627 treeName = treeName.substr(lastSlash + 1, treeName.size());
628 }
629 return {std::string(treeName), std::string(dirName)};
630}
631
632std::string PrettyPrintAddr(const void *const addr)
633{
634 std::stringstream s;
635 // Windows-friendly
636 s << std::hex << std::showbase << reinterpret_cast<size_t>(addr);
637 return s.str();
638}
639
640/// Book the jitting of a Filter call
641std::shared_ptr<RDFDetail::RJittedFilter>
642BookFilterJit(std::shared_ptr<RDFDetail::RNodeBase> *prevNodeOnHeap, std::string_view name, std::string_view expression,
643 const ColumnNames_t &branches, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
644{
645 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
646
647 const auto parsedExpr = ParseRDFExpression(expression, branches, colRegister, dsColumns);
648 const auto exprVarTypes =
649 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, tree, ds, "Filter", /*vector2rvec=*/true);
650 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
651 const auto type = RetTypeOfFunc(funcName);
652 if (type != "bool")
653 std::runtime_error("Filter: the following expression does not evaluate to bool:\n" + std::string(expression));
654
655 // definesOnHeap is deleted by the jitted call to JitFilterHelper
657 const auto definesOnHeapAddr = PrettyPrintAddr(definesOnHeap);
658 const auto prevNodeAddr = PrettyPrintAddr(prevNodeOnHeap);
659
660 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(
661 (*prevNodeOnHeap)->GetLoopManagerUnchecked(), name,
662 Union(colRegister.GetVariationDeps(parsedExpr.fUsedCols), (*prevNodeOnHeap)->GetVariations()));
663
664 // Produce code snippet that creates the filter and registers it with the corresponding RJittedFilter
665 // Windows requires std::hex << std::showbase << (size_t)pointer to produce notation "0x1234"
666 std::stringstream filterInvocation;
667 filterInvocation << "ROOT::Internal::RDF::JitFilterHelper(" << funcName << ", new const char*["
668 << parsedExpr.fUsedCols.size() << "]{";
669 for (const auto &col : parsedExpr.fUsedCols)
670 filterInvocation << "\"" << col << "\", ";
671 if (!parsedExpr.fUsedCols.empty())
672 filterInvocation.seekp(-2, filterInvocation.cur); // remove the last ",
673 // lifetime of pointees:
674 // - jittedFilter: heap-allocated weak_ptr to the actual jittedFilter that will be deleted by JitFilterHelper
675 // - prevNodeOnHeap: heap-allocated shared_ptr to the actual previous node that will be deleted by JitFilterHelper
676 // - definesOnHeap: heap-allocated, will be deleted by JitFilterHelper
677 filterInvocation << "}, " << parsedExpr.fUsedCols.size() << ", \"" << name << "\", "
678 << "reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedFilter>*>("
679 << PrettyPrintAddr(MakeWeakOnHeap(jittedFilter)) << "), "
680 << "reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>(" << prevNodeAddr << "),"
681 << "reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesOnHeapAddr << ")"
682 << ");\n";
683
684 auto lm = jittedFilter->GetLoopManagerUnchecked();
685 lm->ToJitExec(filterInvocation.str());
686
687 return jittedFilter;
688}
689
690/// Book the jitting of a Define call
691std::shared_ptr<RJittedDefine> BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm,
692 RDataSource *ds, const RColumnRegister &colRegister,
693 const ColumnNames_t &branches,
694 std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
695{
696 auto *const tree = lm.GetTree();
697 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
698
699 const auto parsedExpr = ParseRDFExpression(expression, branches, colRegister, dsColumns);
700 const auto exprVarTypes =
701 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, tree, ds, "Define", /*vector2rvec=*/true);
702 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
703 const auto type = RetTypeOfFunc(funcName);
704
705 auto definesCopy = new RColumnRegister(colRegister);
706 auto definesAddr = PrettyPrintAddr(definesCopy);
707 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(name, type, lm, colRegister, parsedExpr.fUsedCols);
708
709 std::stringstream defineInvocation;
710 defineInvocation << "ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefineTag>(" << funcName
711 << ", new const char*[" << parsedExpr.fUsedCols.size() << "]{";
712 for (const auto &col : parsedExpr.fUsedCols) {
713 defineInvocation << "\"" << col << "\", ";
714 }
715 if (!parsedExpr.fUsedCols.empty())
716 defineInvocation.seekp(-2, defineInvocation.cur); // remove the last ",
717 // lifetime of pointees:
718 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
719 // - jittedDefine: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
720 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
721 defineInvocation << "}, " << parsedExpr.fUsedCols.size() << ", \"" << name
722 << "\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << PrettyPrintAddr(&lm)
723 << "), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>("
724 << PrettyPrintAddr(MakeWeakOnHeap(jittedDefine))
725 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr
726 << "), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
727 << PrettyPrintAddr(upcastNodeOnHeap) << "));\n";
728
729 lm.ToJitExec(defineInvocation.str());
730 return jittedDefine;
731}
732
733/// Book the jitting of a DefinePerSample call
734std::shared_ptr<RJittedDefine> BookDefinePerSampleJit(std::string_view name, std::string_view expression,
735 RLoopManager &lm, const RColumnRegister &colRegister,
736 std::shared_ptr<RNodeBase> *upcastNodeOnHeap)
737{
738 const auto funcName = DeclareFunction(std::string(expression), {"rdfslot_", "rdfsampleinfo_"},
739 {"unsigned int", "const ROOT::RDF::RSampleInfo"});
740 const auto retType = RetTypeOfFunc(funcName);
741
742 auto definesCopy = new RColumnRegister(colRegister);
743 auto definesAddr = PrettyPrintAddr(definesCopy);
744 auto jittedDefine = std::make_shared<RDFDetail::RJittedDefine>(name, retType, lm, colRegister, ColumnNames_t{});
745
746 std::stringstream defineInvocation;
747 defineInvocation << "ROOT::Internal::RDF::JitDefineHelper<ROOT::Internal::RDF::DefineTypes::RDefinePerSampleTag>("
748 << funcName << ", nullptr, 0, ";
749 // lifetime of pointees:
750 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
751 // - jittedDefine: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
752 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
753 defineInvocation << "\"" << name << "\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << PrettyPrintAddr(&lm)
754 << "), reinterpret_cast<std::weak_ptr<ROOT::Detail::RDF::RJittedDefine>*>("
755 << PrettyPrintAddr(MakeWeakOnHeap(jittedDefine))
756 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr
757 << "), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
758 << PrettyPrintAddr(upcastNodeOnHeap) << "));\n";
759
760 lm.ToJitExec(defineInvocation.str());
761 return jittedDefine;
762}
763
764/// Book the jitting of a Vary call
765std::shared_ptr<RJittedVariation>
766BookVariationJit(const std::vector<std::string> &colNames, std::string_view variationName,
767 const std::vector<std::string> &variationTags, std::string_view expression, RLoopManager &lm,
768 RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches,
769 std::shared_ptr<RNodeBase> *upcastNodeOnHeap, bool isSingleColumn)
770{
771 auto *const tree = lm.GetTree();
772 const auto &dsColumns = ds ? ds->GetColumnNames() : ColumnNames_t{};
773
774 const auto parsedExpr = ParseRDFExpression(expression, branches, colRegister, dsColumns);
775 const auto exprVarTypes =
776 GetValidatedArgTypes(parsedExpr.fUsedCols, colRegister, tree, ds, "Vary", /*vector2rvec=*/true);
777 const auto funcName = DeclareFunction(parsedExpr.fExpr, parsedExpr.fVarNames, exprVarTypes);
778 const auto type = RetTypeOfFunc(funcName);
779
780 if (type.rfind("ROOT::VecOps::RVec", 0) != 0) {
781 // Avoid leak
782 delete upcastNodeOnHeap;
783 upcastNodeOnHeap = nullptr;
784 throw std::runtime_error(
785 "Jitted Vary expressions must return an RVec object. The following expression returns a " + type +
786 " instead:\n" + parsedExpr.fExpr);
787 }
788
789 auto colRegisterCopy = new RColumnRegister(colRegister);
790 const auto colRegisterAddr = PrettyPrintAddr(colRegisterCopy);
791 auto jittedVariation = std::make_shared<RJittedVariation>(colNames, variationName, variationTags, type, colRegister,
792 lm, parsedExpr.fUsedCols);
793
794 // build invocation to JitVariationHelper
795 // arrays of strings are passed as const char** plus size.
796 // lifetime of pointees:
797 // - lm is the loop manager, and if that goes out of scope jitting does not happen at all (i.e. will always be valid)
798 // - jittedVariation: heap-allocated weak_ptr that will be deleted by JitDefineHelper after usage
799 // - definesAddr: heap-allocated, will be deleted by JitDefineHelper after usage
800 std::stringstream varyInvocation;
801 varyInvocation << "ROOT::Internal::RDF::JitVariationHelper<" << (isSingleColumn ? "true" : "false") << ">("
802 << funcName << ", new const char*[" << parsedExpr.fUsedCols.size() << "]{";
803 for (const auto &col : parsedExpr.fUsedCols) {
804 varyInvocation << "\"" << col << "\", ";
805 }
806 if (!parsedExpr.fUsedCols.empty())
807 varyInvocation.seekp(-2, varyInvocation.cur); // remove the last ", "
808 varyInvocation << "}, " << parsedExpr.fUsedCols.size();
809 varyInvocation << ", new const char*[" << colNames.size() << "]{";
810 for (const auto &col : colNames) {
811 varyInvocation << "\"" << col << "\", ";
812 }
813 varyInvocation.seekp(-2, varyInvocation.cur); // remove the last ", "
814 varyInvocation << "}, " << colNames.size() << ", new const char*[" << variationTags.size() << "]{";
815 for (const auto &tag : variationTags) {
816 varyInvocation << "\"" << tag << "\", ";
817 }
818 varyInvocation.seekp(-2, varyInvocation.cur); // remove the last ", "
819 varyInvocation << "}, " << variationTags.size() << ", \"" << variationName
820 << "\", reinterpret_cast<ROOT::Detail::RDF::RLoopManager*>(" << PrettyPrintAddr(&lm)
821 << "), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedVariation>*>("
822 << PrettyPrintAddr(MakeWeakOnHeap(jittedVariation))
823 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << colRegisterAddr
824 << "), reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
825 << PrettyPrintAddr(upcastNodeOnHeap) << "));\n";
826
827 lm.ToJitExec(varyInvocation.str());
828 return jittedVariation;
829}
830
831// Jit and call something equivalent to "this->BuildAndBook<ColTypes...>(params...)"
832// (see comments in the body for actual jitted code)
833std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr<RDFDetail::RNodeBase> *prevNode,
834 const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap,
835 TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds,
836 std::weak_ptr<RJittedAction> *jittedActionOnHeap)
837{
838 // retrieve type of action as a string
839 auto actionTypeClass = TClass::GetClass(at);
840 if (!actionTypeClass) {
841 std::string exceptionText = "An error occurred while inferring the action type of the operation.";
842 throw std::runtime_error(exceptionText);
843 }
844 const std::string actionTypeName = actionTypeClass->GetName();
845 const std::string actionTypeNameBase = actionTypeName.substr(actionTypeName.rfind(':') + 1);
846
847 // retrieve type of result of the action as a string
848 const auto helperArgTypeName = TypeID2TypeName(helperArgType);
849 if (helperArgTypeName.empty()) {
850 ThrowJitBuildActionHelperTypeError(actionTypeNameBase, helperArgType);
851 }
852
853 auto definesCopy = new RColumnRegister(colRegister); // deleted in jitted CallBuildAction
854 auto definesAddr = PrettyPrintAddr(definesCopy);
855
856 // Build a call to CallBuildAction with the appropriate argument. When run through the interpreter, this code will
857 // just-in-time create an RAction object and it will assign it to its corresponding RJittedAction.
858 std::stringstream createAction_str;
859 createAction_str << "ROOT::Internal::RDF::CallBuildAction<" << actionTypeName;
860 const auto columnTypeNames =
861 GetValidatedArgTypes(cols, colRegister, tree, ds, actionTypeNameBase, /*vector2rvec=*/true);
862 for (auto &colType : columnTypeNames)
863 createAction_str << ", " << colType;
864 // on Windows, to prefix the hexadecimal value of a pointer with '0x',
865 // one need to write: std::hex << std::showbase << (size_t)pointer
866 createAction_str << ">(reinterpret_cast<std::shared_ptr<ROOT::Detail::RDF::RNodeBase>*>("
867 << PrettyPrintAddr(prevNode) << "), new const char*[" << cols.size() << "]{";
868 for (auto i = 0u; i < cols.size(); ++i) {
869 if (i != 0u)
870 createAction_str << ", ";
871 createAction_str << '"' << cols[i] << '"';
872 }
873 createAction_str << "}, " << cols.size() << ", " << nSlots << ", reinterpret_cast<shared_ptr<" << helperArgTypeName
874 << ">*>(" << PrettyPrintAddr(helperArgOnHeap)
875 << "), reinterpret_cast<std::weak_ptr<ROOT::Internal::RDF::RJittedAction>*>("
876 << PrettyPrintAddr(jittedActionOnHeap)
877 << "), reinterpret_cast<ROOT::Internal::RDF::RColumnRegister*>(" << definesAddr << "));";
878 return createAction_str.str();
879}
880
881bool AtLeastOneEmptyString(const std::vector<std::string_view> strings)
882{
883 for (const auto &s : strings) {
884 if (s.empty())
885 return true;
886 }
887 return false;
888}
889
890std::shared_ptr<RNodeBase> UpcastNode(std::shared_ptr<RNodeBase> ptr)
891{
892 return ptr;
893}
894
895/// Given the desired number of columns and the user-provided list of columns:
896/// * fallback to using the first nColumns default columns if needed (or throw if nColumns > nDefaultColumns)
897/// * check that selected column names refer to valid branches, custom columns or datasource columns (throw if not)
898/// * replace column names from aliases by the actual column name
899/// Return the list of selected column names.
900ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns,
901 const RColumnRegister &colRegister, RDataSource *ds)
902{
903 auto selectedColumns = SelectColumns(nColumns, columns, lm.GetDefaultColumnNames());
904
905 for (auto &col : selectedColumns) {
906 col = colRegister.ResolveAlias(col);
907 }
908
909 // Complain if there are still unknown columns at this point
910 const auto unknownColumns = FindUnknownColumns(selectedColumns, lm.GetBranchNames(), colRegister,
911 ds ? ds->GetColumnNames() : ColumnNames_t{});
912
913 if (!unknownColumns.empty()) {
914 using namespace std::string_literals;
915 std::string errMsg = "Unknown column"s + (unknownColumns.size() > 1 ? "s: " : ": ");
916 for (auto &unknownColumn : unknownColumns)
917 errMsg += '"' + unknownColumn + "\", ";
918 errMsg.resize(errMsg.size() - 2); // remove last ", "
919 throw std::runtime_error(errMsg);
920 }
921
922 return selectedColumns;
923}
924
925std::vector<std::string> GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister,
926 TTree *tree, RDataSource *ds, const std::string &context,
927 bool vector2rvec)
928{
929 auto toCheckedArgType = [&](const std::string &c) {
930 RDFDetail::RDefineBase *define = colRegister.GetDefine(c);
931 const auto colType = ColumnName2ColumnTypeName(c, tree, ds, define, vector2rvec);
932 if (colType.rfind("CLING_UNKNOWN_TYPE", 0) == 0) { // the interpreter does not know this type
933 const auto msg =
934 "The type of custom column \"" + c + "\" (" + colType.substr(19) +
935 ") is not known to the interpreter, but a just-in-time-compiled " + context +
936 " call requires this column. Make sure to create and load ROOT dictionaries for this column's class.";
937 throw std::runtime_error(msg);
938 }
939 return colType;
940 };
941 std::vector<std::string> colTypes;
942 colTypes.reserve(colNames.size());
943 std::transform(colNames.begin(), colNames.end(), std::back_inserter(colTypes), toCheckedArgType);
944 return colTypes;
945}
946
947/// Return a bitset each element of which indicates whether the corresponding element in `selectedColumns` is the
948/// name of a column that must be defined via datasource. All elements of the returned vector are false if no
949/// data-source is present.
950std::vector<bool> FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedCols)
951{
952 const auto nColumns = requestedCols.size();
953 std::vector<bool> mustBeDefined(nColumns, false);
954 for (auto i = 0u; i < nColumns; ++i)
955 mustBeDefined[i] = std::find(definedCols.begin(), definedCols.end(), requestedCols[i]) == definedCols.end();
956 return mustBeDefined;
957}
958
960{
961 std::unordered_set<std::string> uniqueCols;
962 for (auto &col : cols) {
963 if (!uniqueCols.insert(col).second) {
964 const auto msg = "Error: column \"" + col +
965 "\" was passed to Snapshot twice. This is not supported: only one of the columns would be "
966 "readable with RDataFrame.";
967 throw std::logic_error(msg);
968 }
969 }
970}
971
972/// Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array branches added
973/// in the right positions (i.e. before the array branches that need them).
974std::pair<std::vector<std::string>, std::vector<std::string>>
975AddSizeBranches(const std::vector<std::string> &branches, TTree *tree, std::vector<std::string> &&colsWithoutAliases,
976 std::vector<std::string> &&colsWithAliases)
977{
978 if (!tree) // nothing to do
979 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
980
981 assert(colsWithoutAliases.size() == colsWithAliases.size());
982
983 auto nCols = colsWithoutAliases.size();
984 // Use index-iteration as we modify the vector during the iteration.
985 for (std::size_t i = 0u; i < nCols; ++i) {
986 const auto &colName = colsWithoutAliases[i];
987 if (!IsStrInVec(colName, branches))
988 continue; // this column is not a TTree branch, nothing to do
989
990 auto *b = tree->GetBranch(colName.c_str());
991 if (!b) // try harder
992 b = tree->FindBranch(colName.c_str());
993 assert(b != nullptr);
994 auto *leaves = b->GetListOfLeaves();
995 if (b->IsA() != TBranch::Class() || leaves->GetEntries() != 1)
996 continue; // this branch is not a variable-sized array, nothing to do
997
998 TLeaf *countLeaf = static_cast<TLeaf *>(leaves->At(0))->GetLeafCount();
999 if (!countLeaf || IsStrInVec(countLeaf->GetName(), colsWithoutAliases))
1000 continue; // not a variable-sized array or the size branch is already there, nothing to do
1001
1002 // otherwise we must insert the size in colsWithoutAliases _and_ colsWithAliases
1003 colsWithoutAliases.insert(colsWithoutAliases.begin() + i, countLeaf->GetName());
1004 colsWithAliases.insert(colsWithAliases.begin() + i, countLeaf->GetName());
1005 ++nCols;
1006 ++i; // as we inserted an element in the vector we iterate over, we need to move the index forward one extra time
1007 }
1008
1009 return {std::move(colsWithoutAliases), std::move(colsWithAliases)};
1010}
1011
1013{
1014 std::set<std::string> uniqueCols;
1015 columnNames.erase(
1016 std::remove_if(columnNames.begin(), columnNames.end(),
1017 [&uniqueCols](const std::string &colName) { return !uniqueCols.insert(colName).second; }),
1018 columnNames.end());
1019}
1020
1021} // namespace RDF
1022} // namespace Internal
1023} // namespace ROOT
#define b(i)
Definition RSha256.hxx:100
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char cname
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char text
char name[80]
Definition TGX11.cxx:110
R__EXTERN TVirtualMutex * gROOTMutex
Definition TROOT.h:63
#define gROOT
Definition TROOT.h:407
#define R__LOCKGUARD(mutex)
#define free
Definition civetweb.c:1539
The head node of a RDF computation graph.
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
const ColumnNames_t & GetDefaultColumnNames() const
Return the list of default columns – empty if none was provided when constructing the RDataFrame.
A binder for user-defined columns, variations and aliases.
bool IsDefineOrAlias(std::string_view name) const
Check if the provided name is tracked in the names list.
bool IsAlias(std::string_view name) const
Return true if the given column name is an existing alias.
std::string_view ResolveAlias(std::string_view alias) const
Return the actual column name that the alias resolves to.
std::vector< std::string > GetVariationDeps(const std::string &column) const
Get the names of all variations that directly or indirectly affect a given column.
RDFDetail::RDefineBase * GetDefine(std::string_view colName) const
Return the RDefine for the requested column name, or nullptr.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
static TClass * Class()
static TClass * GetClass(const char *name, Bool_t load=kTRUE, Bool_t silent=kFALSE)
Static method returning pointer to TClass of the specified class name.
Definition TClass.cxx:2968
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
Bool_t MatchB(const TString &s, const TString &mods="", Int_t start=0, Int_t nMaxMatch=10)
Definition TPRegexp.h:78
Basic string class.
Definition TString.h:139
A TTree represents a columnar dataset.
Definition TTree.h:79
const ColumnNames_t SelectColumns(unsigned int nRequiredNames, const ColumnNames_t &names, const ColumnNames_t &defaultNames)
Choose between local column names or default column names, throw in case of errors.
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
Throw if the column has systematic variations attached.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
Definition RDFUtils.cxx:222
void RemoveDuplicates(ColumnNames_t &columnNames)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &colRegister, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:99
bool IsStrInVec(const std::string &str, const std::vector< std::string > &vec)
Definition RDFUtils.cxx:424
std::string ResolveAlias(const std::string &col, const std::map< std::string, std::string > &aliasMap)
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string PrettyPrintAddr(const void *const addr)
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
std::vector< T > Union(const std::vector< T > &v1, const std::vector< T > &v2)
Return a vector with all elements of v1 and v2 and duplicates removed.
Definition Utils.hxx:268
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &colRegister, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition RDFUtils.cxx:363
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
void InterpreterDeclare(const std::string &code)
Declare code in the interpreter via the TInterpreter::Declare method, throw in case of errors.
Definition RDFUtils.cxx:315
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap, bool isSingleColumn)
Book the jitting of a Vary call.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(const std::vector< std::string > &branches, TTree *tree, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::vector< bool > FindUndefinedDSColumns(const ColumnNames_t &requestedCols, const ColumnNames_t &definedCols)
Return a bitset each element of which indicates whether the corresponding element in selectedColumns ...
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &colRegister, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
ColumnNames_t FindUnknownColumns(const ColumnNames_t &requiredCols, const ColumnNames_t &datasetColumns, const RColumnRegister &definedCols, const ColumnNames_t &dataSourceColumns)
std::vector< std::string > ColumnNames_t
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.