Logo ROOT   6.14/05
Reference Guide
RDFInterface.hxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_RDF_TINTERFACE
12 #define ROOT_RDF_TINTERFACE
13 
14 #include <stddef.h>
15 #include <algorithm>
16 #include <initializer_list>
17 #include <limits>
18 #include <memory>
19 #include <sstream>
20 #include <stdexcept>
21 #include <string>
22 #include <tuple>
23 #include <type_traits> // is_same, enable_if
24 #include <typeinfo>
25 #include <vector>
26 
28 #include "ROOT/RStringView.hxx"
29 #include "ROOT/RCutFlowReport.hxx"
31 #include "ROOT/RDFHistoModels.hxx"
33 #include "ROOT/RDFNodes.hxx"
34 #include "ROOT/RDFNodesUtils.hxx"
35 #include "ROOT/RDFUtils.hxx"
36 #include "ROOT/RDataSource.hxx"
37 #include "ROOT/RLazyDSImpl.hxx"
38 #include "ROOT/RResultPtr.hxx"
40 #include "ROOT/TypeTraits.hxx"
41 #include "RtypesCore.h" // for ULong64_t
42 #include "TAxis.h"
43 #include "TChain.h"
44 #include "TClassEdit.h"
45 #include "TDirectory.h"
46 #include "TError.h"
47 #include "TH1.h" // For Histo actions
48 #include "TH2.h" // For Histo actions
49 #include "TH3.h" // For Histo actions
50 #include "TInterpreter.h"
51 #include "TProfile.h" // For Histo actions
52 #include "TProfile2D.h" // For Histo actions
53 #include "TROOT.h" // IsImplicitMTEnabled
54 #include "TRegexp.h"
55 #include "TString.h"
56 #include "TTreeReader.h"
57 
58 class TH2D;
59 class TH3D;
60 class TProfile2D;
61 class TProfile;
62 namespace ROOT {
63 namespace Detail {
64 namespace RDF {
65 namespace TCCHelperTypes {
66 struct TNothing;
67 struct TSlot;
68 struct TSlotAndEntry;
69 } // namespace TCCHelperTypes
70 } // namespace RDF
71 } // namespace Detail
72 } // namespace ROOT
73 
74 namespace ROOT {
75 
76 // forward declarations
77 class RDataFrame;
78 
79 } // namespace ROOT
80 
81 namespace cling {
82 std::string printValue(ROOT::RDataFrame *tdf); // For a nice printing at the prompt
83 }
84 
85 namespace ROOT {
86 
87 namespace RDF {
88 namespace RDFDetail = ROOT::Detail::RDF;
90 namespace TTraits = ROOT::TypeTraits;
91 
92 /**
93 * \class ROOT::RDF::RInterface
94 * \ingroup dataframe
95 * \brief The public interface to the RDataFrame federation of classes
96 * \tparam T One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
97 */
98 template <typename Proxied, typename DataSource = void>
99 class RInterface {
100  using DS_t = DataSource;
101  using ColumnNames_t = RDFDetail::ColumnNames_t;
106  friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
107  template <typename T, typename W>
108  friend class RInterface;
109 
110  const std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
111  const std::weak_ptr<RLoopManager> fImplWeakPtr; ///< Weak pointer to the RLoopManager at the root of the graph.
112  ColumnNames_t fValidCustomColumns; ///< Names of columns `Define`d for this branch of the functional graph.
113  /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
114  RDataSource *const fDataSource = nullptr;
115  std::shared_ptr<const ColumnNames_t> fBranchNames; ///< Cache of the chain columns names
116 
117 public:
118  // Template conversion operator, meant to use to convert RInterfaces of certain node types to RInterfaces of base
119  // classes of those node types, e.g. RInterface<TFilter<F,P>> -> RInterface<TFilterBase>.
120  template <typename NewProxied>
122  {
123  static_assert(std::is_base_of<NewProxied, Proxied>::value,
124  "RInterface<T> can only be converted to RInterface<BaseOfT>");
125  return RInterface<NewProxied>(fProxiedPtr, fImplWeakPtr, fValidCustomColumns, fBranchNames, fDataSource);
126  }
127 
128  ////////////////////////////////////////////////////////////////////////////
129  /// \brief Copy-assignment operator for RInterface.
130  RInterface &operator=(const RInterface &) = default;
131 
132  ////////////////////////////////////////////////////////////////////////////
133  /// \brief Copy-ctor for RInterface.
134  RInterface(const RInterface &) = default;
135 
136  ////////////////////////////////////////////////////////////////////////////
137  /// \brief Move-ctor for RInterface.
138  RInterface(RInterface &&) = default;
139 
140  ////////////////////////////////////////////////////////////////////////////
141  /// \brief Only enabled when building a RInterface<RLoopManager>
142  template <typename T = Proxied, typename std::enable_if<std::is_same<T, RLoopManager>::value, int>::type = 0>
143  RInterface(const std::shared_ptr<Proxied> &proxied)
144  : fProxiedPtr(proxied), fImplWeakPtr(proxied), fValidCustomColumns(), fDataSource(proxied->GetDataSource())
145  {
146  AddDefaultColumns();
147  }
148 
149  ////////////////////////////////////////////////////////////////////////////
150  /// \brief Append a filter to the call graph.
151  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
152  /// signalling whether the event has passed the selection (true) or not (false).
153  /// \param[in] columns Names of the columns/branches in input to the filter function.
154  /// \param[in] name Optional name of this filter. See `Report`.
155  ///
156  /// Append a filter node at the point of the call graph corresponding to the
157  /// object this method is called on.
158  /// The callable `f` should not have side-effects (e.g. modification of an
159  /// external or static variable) to ensure correct results when implicit
160  /// multi-threading is active.
161  ///
162  /// RDataFrame only evaluates filters when necessary: if multiple filters
163  /// are chained one after another, they are executed in order and the first
164  /// one returning false causes the event to be discarded.
165  /// Even if multiple actions or transformations depend on the same filter,
166  /// it is executed once per entry. If its result is requested more than
167  /// once, the cached result is served.
168  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
170  Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
171  {
172  RDFInternal::CheckFilter(f);
173  auto loopManager = GetLoopManager();
174  using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
175  constexpr auto nColumns = ColTypes_t::list_size;
176  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
177  if (fDataSource)
178  RDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, *fDataSource,
179  std::make_index_sequence<nColumns>(), ColTypes_t());
180  using F_t = RDFDetail::RFilter<F, Proxied>;
181  auto FilterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, *fProxiedPtr, name);
182  loopManager->Book(FilterPtr);
183  return RInterface<F_t, DS_t>(FilterPtr, fImplWeakPtr, fValidCustomColumns, fBranchNames, fDataSource);
184  }
185 
186  ////////////////////////////////////////////////////////////////////////////
187  /// \brief Append a filter to the call graph.
188  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
189  /// signalling whether the event has passed the selection (true) or not (false).
190  /// \param[in] name Optional name of this filter. See `Report`.
191  ///
192  /// Refer to the first overload of this method for the full documentation.
193  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
195  {
196  // The sfinae is there in order to pick up the overloaded method which accepts two strings
197  // rather than this template method.
198  return Filter(f, {}, name);
199  }
200 
201  ////////////////////////////////////////////////////////////////////////////
202  /// \brief Append a filter to the call graph.
203  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
204  /// signalling whether the event has passed the selection (true) or not (false).
205  /// \param[in] columns Names of the columns/branches in input to the filter function.
206  ///
207  /// Refer to the first overload of this method for the full documentation.
208  template <typename F>
209  RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
210  {
211  return Filter(f, ColumnNames_t{columns});
212  }
213 
214  ////////////////////////////////////////////////////////////////////////////
215  /// \brief Append a filter to the call graph.
216  /// \param[in] expression The filter expression in C++
217  /// \param[in] name Optional name of this filter. See `Report`.
218  ///
219  /// The expression is just-in-time compiled and used to filter entries. It must
220  /// be valid C++ syntax in which variable names are substituted with the names
221  /// of branches/columns.
222  ///
223  /// Refer to the first overload of this method for the full documentation.
225  {
226  auto df = GetLoopManager();
227  const auto &aliasMap = df->GetAliasMap();
228  auto *const tree = df->GetTree();
230  const auto &customColumns = df->GetCustomColumnNames();
231 
232  auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
233  RInterface<typename decltype(upcastNode)::element_type> upcastInterface(upcastNode, fImplWeakPtr,
234  fValidCustomColumns, fBranchNames, fDataSource);
235  const auto prevNodeTypeName = upcastInterface.GetNodeTypeName();
236  const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(df.get(), name);
237  RDFInternal::BookFilterJit(jittedFilter.get(), upcastNode.get(), prevNodeTypeName, name, expression, aliasMap,
238  branches, customColumns, tree, fDataSource, df->GetID());
239 
240  df->Book(jittedFilter);
241  return RInterface<RDFDetail::RJittedFilter, DS_t>(jittedFilter, fImplWeakPtr, fValidCustomColumns,
242  fBranchNames, fDataSource);
243  }
244 
245  // clang-format off
246  ////////////////////////////////////////////////////////////////////////////
247  /// \brief Creates a custom column
248  /// \param[in] name The name of the custom column.
249  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
250  /// \param[in] columns Names of the columns/branches in input to the producer function.
251  ///
252  /// Create a custom column that will be visible from all subsequent nodes
253  /// of the functional chain. The `expression` is only evaluated for entries that pass
254  /// all the preceding filters.
255  /// A new variable is created called `name`, accessible as if it was contained
256  /// in the dataset from subsequent transformations/actions.
257  ///
258  /// Use cases include:
259  ///
260  /// * caching the results of complex calculations for easy and efficient multiple access
261  /// * extraction of quantities of interest from complex objects
262  /// * column aliasing, i.e. changing the name of a branch/column
263  ///
264  /// An exception is thrown if the name of the new column is already in use.
265  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
266  RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {})
267  {
268  return DefineImpl<F, RDFDetail::TCCHelperTypes::TNothing>(name, std::move(expression), columns);
269  }
270  // clang-format on
271 
272  // clang-format off
273  ////////////////////////////////////////////////////////////////////////////
274  /// \brief Creates a custom column with a value dependent on the processing slot.
275  /// \param[in] name The name of the custom column.
276  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
277  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
278  ///
279  /// This alternative implementation of `Define` is meant as a helper in writing thread-safe custom columns.
280  /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
281  /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
282  /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
283  /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1.
284  ///
285  /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
286  /// ~~~{.cpp}
287  /// int function(unsigned int, double, double);
288  /// Define("x", function, {"tdfslot_", "column1", "column2"})
289  /// DefineSlot("x", function, {"column1", "column2"})
290  /// ~~~
291  ///
292  /// See Define for more information.
293  template <typename F>
294  RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
295  {
296  return DefineImpl<F, RDFDetail::TCCHelperTypes::TSlot>(name, std::move(expression), columns);
297  }
298  // clang-format on
299 
300  // clang-format off
301  ////////////////////////////////////////////////////////////////////////////
302  /// \brief Creates a custom column with a value dependent on the processing slot and the current entry.
303  /// \param[in] name The name of the custom column.
304  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
305  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
306  ///
307  /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
308  /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
309  /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
310  /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
311  /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1. The second parameter
312  /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
313  ///
314  /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
315  /// ~~~{.cpp}
316  /// int function(unsigned int, ULong64_t, double, double);
317  /// Define("x", function, {"tdfslot_", "tdfentry_", "column1", "column2"})
318  /// DefineSlotEntry("x", function, {"column1", "column2"})
319  /// ~~~
320  ///
321  /// See Define for more information.
322  template <typename F>
324  {
325  return DefineImpl<F, RDFDetail::TCCHelperTypes::TSlotAndEntry>(name, std::move(expression), columns);
326  }
327  // clang-format on
328 
329  ////////////////////////////////////////////////////////////////////////////
330  /// \brief Creates a custom column
331  /// \param[in] name The name of the custom column.
332  /// \param[in] expression An expression in C++ which represents the temporary value
333  ///
334  /// The expression is just-in-time compiled and used to produce the column entries.
335  /// It must be valid C++ syntax in which variable names are substituted with the names
336  /// of branches/columns.
337  ///
338  /// Refer to the first overload of this method for the full documentation.
340  {
341  auto lm = GetLoopManager();
342  // this check must be done before jitting lest we throw exceptions in jitted code
343  RDFInternal::CheckCustomColumn(name, lm->GetTree(), lm->GetCustomColumnNames(),
344  fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
345 
346  RDFInternal::BookDefineJit(name, expression, *lm, fDataSource);
347 
348  RInterface<Proxied, DS_t> newInterface(fProxiedPtr, fImplWeakPtr, fValidCustomColumns,
349  fBranchNames, fDataSource);
350  newInterface.fValidCustomColumns.emplace_back(name);
351  return newInterface;
352  }
353 
354  ////////////////////////////////////////////////////////////////////////////
355  /// \brief Allow to refer to a column with a different name
356  /// \param[in] alias name of the column alias
357  /// \param[in] columnName of the column to be aliased
358  /// Aliasing an alias is supported.
360  {
361  // The symmetry with Define is clear. We want to:
362  // - Create globally the alias and return this very node, unchanged
363  // - Make aliases accessible based on chains and not globally
364  auto loopManager = GetLoopManager();
365 
366  // Helper to find out if a name is a column
367  auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
368 
369  // If the alias name is a column name, there is a problem
370  RDFInternal::CheckCustomColumn(alias, loopManager->GetTree(), fValidCustomColumns, dsColumnNames);
371 
372  const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
373 
374  loopManager->AddColumnAlias(std::string(alias), validColumnName);
375  RInterface<Proxied, DS_t> newInterface(fProxiedPtr, fImplWeakPtr, fValidCustomColumns, fBranchNames, fDataSource);
376  newInterface.fValidCustomColumns.emplace_back(alias);
377  return newInterface;
378  }
379 
380  ////////////////////////////////////////////////////////////////////////////
381  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
382  /// \tparam BranchTypes variadic list of branch/column types
383  /// \param[in] treename The name of the output TTree
384  /// \param[in] filename The name of the output TFile
385  /// \param[in] columnList The list of names of the columns/branches to be written
386  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
387  ///
388  /// This function returns a `RDataFrame` built with the output tree as a source.
389  template <typename... BranchTypes>
391  Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
392  const RSnapshotOptions &options = RSnapshotOptions())
393  {
394  return SnapshotImpl<BranchTypes...>(treename, filename, columnList, options);
395  }
396 
397  ////////////////////////////////////////////////////////////////////////////
398  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
399  /// \param[in] treename The name of the output TTree
400  /// \param[in] filename The name of the output TFile
401  /// \param[in] columnList The list of names of the columns/branches to be written
402  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
403  ///
404  /// This function returns a `RDataFrame` built with the output tree as a source.
405  /// The types of the columns are automatically inferred and do not need to be specified.
407  const ColumnNames_t &columnList,
408  const RSnapshotOptions &options = RSnapshotOptions())
409  {
410  auto df = GetLoopManager();
411 
412  // Early return: if the list of columns is empty, just return an empty RDF
413  // If we proceed, the jitted call will not compile!
414  if (columnList.empty()) {
415  auto nEntries = *this->Count();
416  auto snapshotRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(nEntries));
417  return MakeResultPtr(snapshotRDF, df, nullptr);
418  }
419  auto tree = df->GetTree();
420  const auto nsID = df->GetID();
421  std::stringstream snapCall;
422  auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
423  RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, fImplWeakPtr,
424  fValidCustomColumns, fBranchNames, fDataSource);
425  // build a string equivalent to
426  // "(RInterface<nodetype*>*)(this)->Snapshot<Ts...>(treename,filename,*(ColumnNames_t*)(&columnList), options)"
427  // on Windows, to prefix the hexadecimal value of a pointer with '0x',
428  // one need to write: std::hex << std::showbase << (size_t)pointer
429  snapCall << "reinterpret_cast<ROOT::RDF::RInterface<" << upcastInterface.GetNodeTypeName() << ">*>(" << std::hex
430  << std::showbase << (size_t)&upcastInterface << ")->Snapshot<";
431 
432  const auto &customCols = df->GetCustomColumnNames();
433  const auto dontConvertVector = false;
434  const auto validCols = GetValidatedColumnNames(columnList.size(), columnList);
435  for (auto &c : validCols) {
436  const auto isCustom = std::find(customCols.begin(), customCols.end(), c) != customCols.end();
437  snapCall << RDFInternal::ColumnName2ColumnTypeName(c, nsID, tree, fDataSource, isCustom, dontConvertVector)
438  << ", ";
439  };
440  if (!columnList.empty())
441  snapCall.seekp(-2, snapCall.cur); // remove the last ",
442  snapCall << ">(\"" << treename << "\", \"" << filename << "\", "
443  << "*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
444  << std::hex << std::showbase << (size_t)&columnList << "),"
445  << "*reinterpret_cast<ROOT::RDF::RSnapshotOptions*>(" << std::hex << std::showbase << (size_t)&options
446  << "));";
447  // jit snapCall, return result
448  TInterpreter::EErrorCode errorCode;
449  auto newRDFPtr = gInterpreter->Calc(snapCall.str().c_str(), &errorCode);
450  if (TInterpreter::EErrorCode::kNoError != errorCode) {
451  std::string msg = "Cannot jit Snapshot call. Interpreter error code is " + std::to_string(errorCode) + ".";
452  throw std::runtime_error(msg);
453  }
454  return *reinterpret_cast<RResultPtr<RInterface<RLoopManager>> *>(newRDFPtr);
455  }
456 
457  // clang-format off
458  ////////////////////////////////////////////////////////////////////////////
459  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
460  /// \param[in] treename The name of the output TTree
461  /// \param[in] filename The name of the output TFile
462  /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. See the documentation of TRegexp for more details. An empty string signals the selection of all columns.
463  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
464  ///
465  /// This function returns a `RDataFrame` built with the output tree as a source.
466  /// The types of the columns are automatically inferred and do not need to be specified.
468  std::string_view columnNameRegexp = "",
469  const RSnapshotOptions &options = RSnapshotOptions())
470  {
471  auto selectedColumns = ConvertRegexToColumns(columnNameRegexp, "Snapshot");
472  return Snapshot(treename, filename, selectedColumns, options);
473  }
474  // clang-format on
475 
476  // clang-format off
477  ////////////////////////////////////////////////////////////////////////////
478  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
479  /// \param[in] treename The name of the output TTree
480  /// \param[in] filename The name of the output TFile
481  /// \param[in] columnList The list of names of the columns/branches to be written
482  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
483  ///
484  /// This function returns a `RDataFrame` built with the output tree as a source.
485  /// The types of the columns are automatically inferred and do not need to be specified.
487  std::initializer_list<std::string> columnList,
488  const RSnapshotOptions &options = RSnapshotOptions())
489  {
490  ColumnNames_t selectedColumns(columnList);
491  return Snapshot(treename, filename, selectedColumns, options);
492  }
493  // clang-format on
494 
495  ////////////////////////////////////////////////////////////////////////////
496  /// \brief Save selected columns in memory
497  /// \param[in] columns to be cached in memory
498  ///
499  /// The content of the selected columns is saved in memory exploiting the functionality offered by
500  /// the Take action. No extra copy is carried out when serving cached data to the actions and
501  /// transformations requesting it.
502  template <typename... BranchTypes>
504  {
505  auto staticSeq = std::make_index_sequence<sizeof...(BranchTypes)>();
506  return CacheImpl<BranchTypes...>(columnList, staticSeq);
507  }
508 
509  ////////////////////////////////////////////////////////////////////////////
510  /// \brief Save selected columns in memory
511  /// \param[in] columns to be cached in memory
512  ///
513  /// The content of the selected columns is saved in memory exploiting the functionality offered by
514  /// the Take action. No extra copy is carried out when serving cached data to the actions and
515  /// transformations requesting it.
517  {
518  // Early return: if the list of columns is empty, just return an empty RDF
519  // If we proceed, the jitted call will not compile!
520  if (columnList.empty()) {
521  auto nEntries = *this->Count();
522  RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
523  return emptyRDF;
524  }
525 
526  auto df = GetLoopManager();
527  auto tree = df->GetTree();
528  const auto nsID = df->GetID();
529  std::stringstream snapCall;
530  auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
531  RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, fImplWeakPtr,
532  fValidCustomColumns,
533  fBranchNames,
534  fDataSource);
535  // build a string equivalent to
536  // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
537  // on Windows, to prefix the hexadecimal value of a pointer with '0x',
538  // one need to write: std::hex << std::showbase << (size_t)pointer
539  snapCall << "reinterpret_cast<ROOT::RDF::RInterface<" << upcastInterface.GetNodeTypeName() << ">*>(" << std::hex
540  << std::showbase << (size_t)&upcastInterface << ")->Cache<";
541 
542  const auto &customCols = df->GetCustomColumnNames();
543  for (auto &c : columnList) {
544  const auto isCustom = std::find(customCols.begin(), customCols.end(), c) != customCols.end();
545  snapCall << RDFInternal::ColumnName2ColumnTypeName(c, nsID, tree, fDataSource, isCustom) << ", ";
546  };
547  if (!columnList.empty())
548  snapCall.seekp(-2, snapCall.cur); // remove the last ",
549  snapCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
550  << std::hex << std::showbase << (size_t)&columnList << "));";
551  // jit snapCall, return result
552  TInterpreter::EErrorCode errorCode;
553  auto newRDFPtr = gInterpreter->Calc(snapCall.str().c_str(), &errorCode);
554  if (TInterpreter::EErrorCode::kNoError != errorCode) {
555  std::string msg = "Cannot jit Cache call. Interpreter error code is " + std::to_string(errorCode) + ".";
556  throw std::runtime_error(msg);
557  }
558  return *reinterpret_cast<RInterface<RLoopManager> *>(newRDFPtr);
559  }
560 
561  ////////////////////////////////////////////////////////////////////////////
562  /// \brief Save selected columns in memory
563  /// \param[in] a regular expression to select the columns
564  ///
565  /// The existing columns are matched against the regeular expression. If the string provided
566  /// is empty, all columns are selected.
568  {
569  auto selectedColumns = ConvertRegexToColumns(columnNameRegexp, "Cache");
570  return Cache(selectedColumns);
571  }
572 
573  // clang-format off
574  ////////////////////////////////////////////////////////////////////////////
575  /// \brief Creates a node that filters entries based on range: [begin, end)
576  /// \param[in] begin Initial entry number considered for this range.
577  /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
578  /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
579  ///
580  /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
581  /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
582  // clang-format on
583  RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
584  {
585  // check invariants
586  if (stride == 0 || (end != 0 && end < begin))
587  throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
589  throw std::runtime_error("Range was called with ImplicitMT enabled. Multi-thread ranges are not supported.");
590 
591  auto df = GetLoopManager();
593  auto RangePtr = std::make_shared<Range_t>(begin, end, stride, *fProxiedPtr);
594  df->Book(RangePtr);
595  RInterface<RDFDetail::RRange<Proxied>> tdf_r(RangePtr, fImplWeakPtr, fValidCustomColumns,
596  fBranchNames, fDataSource);
597  return tdf_r;
598  }
599 
600  // clang-format off
601  ////////////////////////////////////////////////////////////////////////////
602  /// \brief Creates a node that filters entries based on range
603  /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
604  ///
605  /// See the other Range overload for a detailed description.
606  // clang-format on
607  RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
608 
609  // clang-format off
610  ////////////////////////////////////////////////////////////////////////////
611  /// \brief Execute a user-defined function on each entry (*instant action*)
612  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
613  /// \param[in] columns Names of the columns/branches in input to the user function.
614  ///
615  /// The callable `f` is invoked once per entry. This is an *instant action*:
616  /// upon invocation, an event loop as well as execution of all scheduled actions
617  /// is triggered.
618  /// Users are responsible for the thread-safety of this callable when executing
619  /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
620  // clang-format on
621  template <typename F>
622  void Foreach(F f, const ColumnNames_t &columns = {})
623  {
625  using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
626  ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
627  }
628 
629  // clang-format off
630  ////////////////////////////////////////////////////////////////////////////
631  /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*)
632  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
633  /// \param[in] columns Names of the columns/branches in input to the user function.
634  ///
635  /// Same as `Foreach`, but the user-defined function takes an extra
636  /// `unsigned int` as its first parameter, the *processing slot index*.
637  /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
638  /// for each thread of execution.
639  /// This is meant as a helper in writing thread-safe `Foreach`
640  /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
641  /// The user-defined processing callable is able to follow different
642  /// *streams of processing* indexed by the first parameter.
643  /// `ForeachSlot` works just as well with single-thread execution: in that
644  /// case `slot` will always be `0`.
645  // clang-format on
646  template <typename F>
647  void ForeachSlot(F f, const ColumnNames_t &columns = {})
648  {
649  auto loopManager = GetLoopManager();
651  constexpr auto nColumns = ColTypes_t::list_size;
652  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
653  if (fDataSource)
654  RDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, *fDataSource,
655  std::make_index_sequence<nColumns>(), ColTypes_t());
656  using Helper_t = RDFInternal::ForeachSlotHelper<F>;
658  loopManager->Book(std::make_shared<Action_t>(Helper_t(std::move(f)), validColumnNames, *fProxiedPtr));
659  loopManager->Run();
660  }
661 
662  // clang-format off
663  ////////////////////////////////////////////////////////////////////////////
664  /// \brief Execute a user-defined reduce operation on the values of a column.
665  /// \tparam F The type of the reduce callable. Automatically deduced.
666  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
667  /// \param[in] f A callable with signature `T(T,T)`
668  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
669  ///
670  /// A reduction takes two values of a column and merges them into one (e.g.
671  /// by summing them, taking the maximum, etc). This action performs the
672  /// specified reduction operation on all processed column values, returning
673  /// a single value of the same type. The callable f must satisfy the general
674  /// requirements of a *processing function* besides having signature `T(T,T)`
675  /// where `T` is the type of column columnName.
676  ///
677  /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
678  /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
679  /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
680  /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
681  /// overload.
682  ///
683  /// This action is *lazy*: upon invocation of this method the calculation is
684  /// booked but not executed. See RResultPtr documentation.
685  // clang-format on
686  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
688  {
689  static_assert(
690  std::is_default_constructible<T>::value,
691  "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
692  return Reduce(std::move(f), columnName, T());
693  }
694 
695  ////////////////////////////////////////////////////////////////////////////
696  /// \brief Execute a user-defined reduce operation on the values of a column.
697  /// \tparam F The type of the reduce callable. Automatically deduced.
698  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
699  /// \param[in] f A callable with signature `T(T,T)`
700  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
701  /// \param[in] redIdentity The reduced object of each thread is initialised to this value.
702  ///
703  /// See the description of the first Reduce overload for more information.
704  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
705  RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
706  {
707  return Aggregate(f, f, columnName, redIdentity);
708  }
709 
710  ////////////////////////////////////////////////////////////////////////////
711  /// \brief Return the number of entries processed (*lazy action*)
712  ///
713  /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
714  /// This action is *lazy*: upon invocation of this method the calculation is
715  /// booked but not executed. See RResultPtr documentation.
717  {
718  auto df = GetLoopManager();
719  const auto nSlots = df->GetNSlots();
720  auto cSPtr = std::make_shared<ULong64_t>(0);
721  using Helper_t = RDFInternal::CountHelper;
723  auto action = std::make_shared<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), *fProxiedPtr);
724  df->Book(action);
725  return MakeResultPtr(cSPtr, df, action.get());
726  }
727 
728  ////////////////////////////////////////////////////////////////////////////
729  /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default)
730  /// \tparam T The type of the column.
731  /// \tparam COLL The type of collection used to store the values.
732  /// \param[in] column The name of the column to collect the values of.
733  ///
734  /// The collection type to be specified for C-style array columns is `RVec<T>`.
735  /// This action is *lazy*: upon invocation of this method the calculation is
736  /// booked but not executed. See RResultPtr documentation.
737  template <typename T, typename COLL = std::vector<T>>
739  {
740  auto loopManager = GetLoopManager();
741  const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
742  const auto validColumnNames = GetValidatedColumnNames(1, columns);
743  if (fDataSource)
744  RDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, *fDataSource,
745  std::make_index_sequence<1>(), TTraits::TypeList<T>());
746 
747  using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
749  auto valuesPtr = std::make_shared<COLL>();
750  const auto nSlots = loopManager->GetNSlots();
751  auto action = std::make_shared<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, *fProxiedPtr);
752  loopManager->Book(action);
753  return MakeResultPtr(valuesPtr, loopManager, action.get());
754  }
755 
756  ////////////////////////////////////////////////////////////////////////////
757  /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
758  /// \tparam V The type of the column used to fill the histogram.
759  /// \param[in] model The returned histogram will be constructed using this as a model.
760  /// \param[in] vName The name of the column that will fill the histogram.
761  ///
762  /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
763  /// is filled with each one of the elements of the container. In case multiple columns of container type
764  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
765  /// possibly different lengths between events).
766  /// This action is *lazy*: upon invocation of this method the calculation is
767  /// booked but not executed. See RResultPtr documentation.
768  /// The user gives up ownership of the model histogram.
769  template <typename V = RDFDetail::TInferType>
770  RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
771  {
772  const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
773  std::shared_ptr<::TH1D> h(nullptr);
774  {
775  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
776  h = model.GetHistogram();
777  h->SetDirectory(nullptr);
778  }
779 
780  if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
781  RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
782  return CreateAction<RDFInternal::ActionTypes::Histo1D, V>(userColumns, h);
783  }
784 
785  template <typename V = RDFDetail::TInferType>
787  {
788  return Histo1D<V>({"", "", 128u, 0., 0.}, vName);
789  }
790 
791  ////////////////////////////////////////////////////////////////////////////
792  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
793  /// \tparam V The type of the column used to fill the histogram.
794  /// \tparam W The type of the column used as weights.
795  /// \param[in] model The returned histogram will be constructed using this as a model.
796  /// \param[in] vName The name of the column that will fill the histogram.
797  /// \param[in] wName The name of the column that will provide the weights.
798  ///
799  /// See the description of the first Histo1D overload for more details.
800  template <typename V = RDFDetail::TInferType, typename W = RDFDetail::TInferType>
802  {
803  const std::vector<std::string_view> columnViews = {vName, wName};
804  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
805  ? ColumnNames_t()
806  : ColumnNames_t(columnViews.begin(), columnViews.end());
807  std::shared_ptr<::TH1D> h(nullptr);
808  {
809  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
810  h = model.GetHistogram();
811  }
812  return CreateAction<RDFInternal::ActionTypes::Histo1D, V, W>(userColumns, h);
813  }
814 
815  ////////////////////////////////////////////////////////////////////////////
816  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
817  /// \tparam V The type of the column used to fill the histogram.
818  /// \tparam W The type of the column used as weights.
819  /// \param[in] vName The name of the column that will fill the histogram.
820  /// \param[in] wName The name of the column that will provide the weights.
821  ///
822  /// This overload uses a default model histogram TH1D("", "", 128u, 0., 0.).
823  /// See the description of the first Histo1D overload for more details.
824  template <typename V = RDFDetail::TInferType, typename W = RDFDetail::TInferType>
826  {
827  return Histo1D<V, W>({"", "", 128u, 0., 0.}, vName, wName);
828  }
829 
830  ////////////////////////////////////////////////////////////////////////////
831  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
832  /// \tparam V The type of the column used to fill the histogram.
833  /// \tparam W The type of the column used as weights.
834  /// \param[in] model The returned histogram will be constructed using this as a model.
835  ///
836  /// This overload will use the first two default columns as column names.
837  /// See the description of the first Histo1D overload for more details.
838  template <typename V, typename W>
839  RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
840  {
841  return Histo1D<V, W>(model, "", "");
842  }
843 
844  ////////////////////////////////////////////////////////////////////////////
845  /// \brief Fill and return a two-dimensional histogram (*lazy action*)
846  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
847  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
848  /// \param[in] model The returned histogram will be constructed using this as a model.
849  /// \param[in] v1Name The name of the column that will fill the x axis.
850  /// \param[in] v2Name The name of the column that will fill the y axis.
851  ///
852  /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
853  /// is filled with each one of the elements of the container. In case multiple columns of container type
854  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
855  /// possibly different lengths between events).
856  /// This action is *lazy*: upon invocation of this method the calculation is
857  /// booked but not executed. See RResultPtr documentation.
858  /// The user gives up ownership of the model histogram.
859  template <typename V1 = RDFDetail::TInferType, typename V2 = RDFDetail::TInferType>
861  {
862  std::shared_ptr<::TH2D> h(nullptr);
863  {
864  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
865  h = model.GetHistogram();
866  }
867  if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
868  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
869  }
870  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
871  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
872  ? ColumnNames_t()
873  : ColumnNames_t(columnViews.begin(), columnViews.end());
874  return CreateAction<RDFInternal::ActionTypes::Histo2D, V1, V2>(userColumns, h);
875  }
876 
877  ////////////////////////////////////////////////////////////////////////////
878  /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*)
879  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
880  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
881  /// \tparam W The type of the column used for the weights of the histogram.
882  /// \param[in] model The returned histogram will be constructed using this as a model.
883  /// \param[in] v1Name The name of the column that will fill the x axis.
884  /// \param[in] v2Name The name of the column that will fill the y axis.
885  /// \param[in] wName The name of the column that will provide the weights.
886  ///
887  /// This action is *lazy*: upon invocation of this method the calculation is
888  /// booked but not executed. See RResultPtr documentation.
889  /// The user gives up ownership of the model histogram.
890  template <typename V1 = RDFDetail::TInferType, typename V2 = RDFDetail::TInferType,
891  typename W = RDFDetail::TInferType>
894  {
895  std::shared_ptr<::TH2D> h(nullptr);
896  {
897  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
898  h = model.GetHistogram();
899  }
900  if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
901  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
902  }
903  const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
904  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
905  ? ColumnNames_t()
906  : ColumnNames_t(columnViews.begin(), columnViews.end());
907  return CreateAction<RDFInternal::ActionTypes::Histo2D, V1, V2, W>(userColumns, h);
908  }
909 
910  template <typename V1, typename V2, typename W>
912  {
913  return Histo2D<V1, V2, W>(model, "", "", "");
914  }
915 
916  ////////////////////////////////////////////////////////////////////////////
917  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
918  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
919  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
920  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
921  /// \param[in] model The returned histogram will be constructed using this as a model.
922  /// \param[in] v1Name The name of the column that will fill the x axis.
923  /// \param[in] v2Name The name of the column that will fill the y axis.
924  /// \param[in] v3Name The name of the column that will fill the z axis.
925  ///
926  /// This action is *lazy*: upon invocation of this method the calculation is
927  /// booked but not executed. See RResultPtr documentation.
928  /// The user gives up ownership of the model histogram.
929  template <typename V1 = RDFDetail::TInferType, typename V2 = RDFDetail::TInferType,
930  typename V3 = RDFDetail::TInferType>
932  std::string_view v3Name = "")
933  {
934  std::shared_ptr<::TH3D> h(nullptr);
935  {
936  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
937  h = model.GetHistogram();
938  }
939  if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
940  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
941  }
942  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
943  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
944  ? ColumnNames_t()
945  : ColumnNames_t(columnViews.begin(), columnViews.end());
946  return CreateAction<RDFInternal::ActionTypes::Histo3D, V1, V2, V3>(userColumns, h);
947  }
948 
949  ////////////////////////////////////////////////////////////////////////////
950  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
951  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
952  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
953  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
954  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
955  /// \param[in] model The returned histogram will be constructed using this as a model.
956  /// \param[in] v1Name The name of the column that will fill the x axis.
957  /// \param[in] v2Name The name of the column that will fill the y axis.
958  /// \param[in] v3Name The name of the column that will fill the z axis.
959  /// \param[in] wName The name of the column that will provide the weights.
960  ///
961  /// This action is *lazy*: upon invocation of this method the calculation is
962  /// booked but not executed. See RResultPtr documentation.
963  /// The user gives up ownership of the model histogram.
964  template <typename V1 = RDFDetail::TInferType, typename V2 = RDFDetail::TInferType,
965  typename V3 = RDFDetail::TInferType, typename W = RDFDetail::TInferType>
967  std::string_view v3Name, std::string_view wName)
968  {
969  std::shared_ptr<::TH3D> h(nullptr);
970  {
971  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
972  h = model.GetHistogram();
973  }
974  if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
975  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
976  }
977  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
978  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
979  ? ColumnNames_t()
980  : ColumnNames_t(columnViews.begin(), columnViews.end());
981  return CreateAction<RDFInternal::ActionTypes::Histo3D, V1, V2, V3, W>(userColumns, h);
982  }
983 
984  template <typename V1, typename V2, typename V3, typename W>
986  {
987  return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
988  }
989 
990  ////////////////////////////////////////////////////////////////////////////
991  /// \brief Fill and return a one-dimensional profile (*lazy action*)
992  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
993  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
994  /// \param[in] model The model to be considered to build the new return value.
995  /// \param[in] v1Name The name of the column that will fill the x axis.
996  /// \param[in] v2Name The name of the column that will fill the y axis.
997  ///
998  /// This action is *lazy*: upon invocation of this method the calculation is
999  /// booked but not executed. See RResultPtr documentation.
1000  /// The user gives up ownership of the model profile object.
1001  template <typename V1 = RDFDetail::TInferType, typename V2 = RDFDetail::TInferType>
1004  {
1005  std::shared_ptr<::TProfile> h(nullptr);
1006  {
1007  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1008  h = model.GetProfile();
1009  }
1010 
1011  if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1012  throw std::runtime_error("Profiles with no axes limits are not supported yet.");
1013  }
1014  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1015  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1016  ? ColumnNames_t()
1017  : ColumnNames_t(columnViews.begin(), columnViews.end());
1018  return CreateAction<RDFInternal::ActionTypes::Profile1D, V1, V2>(userColumns, h);
1019  }
1020 
1021  ////////////////////////////////////////////////////////////////////////////
1022  /// \brief Fill and return a one-dimensional profile (*lazy action*)
1023  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1024  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1025  /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
1026  /// \param[in] model The model to be considered to build the new return value.
1027  /// \param[in] v1Name The name of the column that will fill the x axis.
1028  /// \param[in] v2Name The name of the column that will fill the y axis.
1029  /// \param[in] wName The name of the column that will provide the weights.
1030  ///
1031  /// This action is *lazy*: upon invocation of this method the calculation is
1032  /// booked but not executed. See RResultPtr documentation.
1033  /// The user gives up ownership of the model profile object.
1034  template <typename V1 = RDFDetail::TInferType, typename V2 = RDFDetail::TInferType,
1035  typename W = RDFDetail::TInferType>
1038  {
1039  std::shared_ptr<::TProfile> h(nullptr);
1040  {
1041  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1042  h = model.GetProfile();
1043  }
1044 
1045  if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1046  throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
1047  }
1048  const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1049  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1050  ? ColumnNames_t()
1051  : ColumnNames_t(columnViews.begin(), columnViews.end());
1052  return CreateAction<RDFInternal::ActionTypes::Profile1D, V1, V2, W>(userColumns, h);
1053  }
1054 
1055  template <typename V1, typename V2, typename W>
1057  {
1058  return Profile1D<V1, V2, W>(model, "", "", "");
1059  }
1060 
1061  ////////////////////////////////////////////////////////////////////////////
1062  /// \brief Fill and return a two-dimensional profile (*lazy action*)
1063  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1064  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1065  /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1066  /// \param[in] model The returned profile will be constructed using this as a model.
1067  /// \param[in] v1Name The name of the column that will fill the x axis.
1068  /// \param[in] v2Name The name of the column that will fill the y axis.
1069  /// \param[in] v3Name The name of the column that will fill the z axis.
1070  ///
1071  /// This action is *lazy*: upon invocation of this method the calculation is
1072  /// booked but not executed. See RResultPtr documentation.
1073  /// The user gives up ownership of the model profile.
1074  template <typename V1 = RDFDetail::TInferType, typename V2 = RDFDetail::TInferType,
1075  typename V3 = RDFDetail::TInferType>
1077  std::string_view v2Name = "", std::string_view v3Name = "")
1078  {
1079  std::shared_ptr<::TProfile2D> h(nullptr);
1080  {
1081  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1082  h = model.GetProfile();
1083  }
1084 
1085  if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1086  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1087  }
1088  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1089  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1090  ? ColumnNames_t()
1091  : ColumnNames_t(columnViews.begin(), columnViews.end());
1092  return CreateAction<RDFInternal::ActionTypes::Profile2D, V1, V2, V3>(userColumns, h);
1093  }
1094 
1095  ////////////////////////////////////////////////////////////////////////////
1096  /// \brief Fill and return a two-dimensional profile (*lazy action*)
1097  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1098  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1099  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1100  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1101  /// \param[in] model The returned histogram will be constructed using this as a model.
1102  /// \param[in] v1Name The name of the column that will fill the x axis.
1103  /// \param[in] v2Name The name of the column that will fill the y axis.
1104  /// \param[in] v3Name The name of the column that will fill the z axis.
1105  /// \param[in] wName The name of the column that will provide the weights.
1106  ///
1107  /// This action is *lazy*: upon invocation of this method the calculation is
1108  /// booked but not executed. See RResultPtr documentation.
1109  /// The user gives up ownership of the model profile.
1110  template <typename V1 = RDFDetail::TInferType, typename V2 = RDFDetail::TInferType,
1111  typename V3 = RDFDetail::TInferType, typename W = RDFDetail::TInferType>
1113  std::string_view v3Name, std::string_view wName)
1114  {
1115  std::shared_ptr<::TProfile2D> h(nullptr);
1116  {
1117  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1118  h = model.GetProfile();
1119  }
1120 
1121  if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1122  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1123  }
1124  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1125  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1126  ? ColumnNames_t()
1127  : ColumnNames_t(columnViews.begin(), columnViews.end());
1128  return CreateAction<RDFInternal::ActionTypes::Profile2D, V1, V2, V3, W>(userColumns, h);
1129  }
1130 
1131  template <typename V1, typename V2, typename V3, typename W>
1133  {
1134  return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
1135  }
1136 
1137  ////////////////////////////////////////////////////////////////////////////
1138  /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1139  ///
1140  /// T must be a type that provides a copy- or move-constructor and a `T::Fill` method that takes as many arguments
1141  /// as the column names pass as columnList. The arguments of `T::Fill` must have type equal to the one of the
1142  /// specified columns (these types are passed as template parameters to this method).
1143  /// \tparam FirstColumn The first type of the column the values of which are used to fill the object.
1144  /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
1145  /// \tparam T The type of the object to fill. Automatically deduced.
1146  /// \param[in] model The model to be considered to build the new return value.
1147  /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1148  ///
1149  /// The user gives up ownership of the model object.
1150  /// The list of column names to be used for filling must always be specified.
1151  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
1152  /// See RResultPtr documentation.
1153  template <typename FirstColumn, typename... OtherColumns, typename T> // need FirstColumn to disambiguate overloads
1154  RResultPtr<T> Fill(T &&model, const ColumnNames_t &columnList)
1155  {
1156  auto h = std::make_shared<T>(std::move(model));
1157  if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1158  throw std::runtime_error("The absence of axes limits is not supported yet.");
1159  }
1160  return CreateAction<RDFInternal::ActionTypes::Fill, FirstColumn, OtherColumns...>(columnList, h);
1161  }
1162 
1163  ////////////////////////////////////////////////////////////////////////////
1164  /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1165  ///
1166  /// This overload infers the types of the columns specified in columnList at runtime and just-in-time compiles the
1167  /// method with these types. See previous overload for more information.
1168  /// \tparam T The type of the object to fill. Automatically deduced.
1169  /// \param[in] model The model to be considered to build the new return value.
1170  /// \param[in] columnList The name of the columns read to fill the object.
1171  ///
1172  /// This overload of `Fill` infers the type of the specified columns at runtime and just-in-time compiles the
1173  /// previous overload. Check the previous overload for more details on `Fill`.
1174  template <typename T>
1176  {
1177  auto h = std::make_shared<T>(std::move(model));
1178  if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1179  throw std::runtime_error("The absence of axes limits is not supported yet.");
1180  }
1181  return CreateAction<RDFInternal::ActionTypes::Fill, RDFDetail::TInferType>(bl, h, bl.size());
1182  }
1183 
1184  ////////////////////////////////////////////////////////////////////////////
1185  /// \brief Return the minimum of processed column values (*lazy action*)
1186  /// \tparam T The type of the branch/column.
1187  /// \param[in] columnName The name of the branch/column to be treated.
1188  ///
1189  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1190  /// template specialization of this method.
1191  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1192  ///
1193  /// This action is *lazy*: upon invocation of this method the calculation is
1194  /// booked but not executed. See RResultPtr documentation.
1195  template <typename T = RDFDetail::TInferType>
1197  {
1198  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1199  using RetType_t = RDFDetail::MinReturnType_t<T>;
1200  auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
1201  return CreateAction<RDFInternal::ActionTypes::Min, T>(userColumns, minV);
1202  }
1203 
1204  ////////////////////////////////////////////////////////////////////////////
1205  /// \brief Return the maximum of processed column values (*lazy action*)
1206  /// \tparam T The type of the branch/column.
1207  /// \param[in] columnName The name of the branch/column to be treated.
1208  ///
1209  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1210  /// template specialization of this method.
1211  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1212  ///
1213  /// This action is *lazy*: upon invocation of this method the calculation is
1214  /// booked but not executed. See RResultPtr documentation.
1215  template <typename T = RDFDetail::TInferType>
1217  {
1218  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1219  using RetType_t = RDFDetail::MaxReturnType_t<T>;
1220  auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
1221  return CreateAction<RDFInternal::ActionTypes::Max, T>(userColumns, maxV);
1222  }
1223 
1224  ////////////////////////////////////////////////////////////////////////////
1225  /// \brief Return the mean of processed column values (*lazy action*)
1226  /// \tparam T The type of the branch/column.
1227  /// \param[in] columnName The name of the branch/column to be treated.
1228  ///
1229  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1230  /// template specialization of this method.
1231  ///
1232  /// This action is *lazy*: upon invocation of this method the calculation is
1233  /// booked but not executed. See RResultPtr documentation.
1234  template <typename T = RDFDetail::TInferType>
1236  {
1237  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1238  auto meanV = std::make_shared<double>(0);
1239  return CreateAction<RDFInternal::ActionTypes::Mean, T>(userColumns, meanV);
1240  }
1241 
1242  // clang-format off
1243  ////////////////////////////////////////////////////////////////////////////
1244  /// \brief Return the sum of processed column values (*lazy action*)
1245  /// \tparam T The type of the branch/column.
1246  /// \param[in] columnName The name of the branch/column.
1247  /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
1248  ///
1249  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1250  /// template specialization of this method.
1251  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1252  ///
1253  /// This action is *lazy*: upon invocation of this method the calculation is
1254  /// booked but not executed. See RResultPtr documentation.
1255  template <typename T = RDFDetail::TInferType>
1257  Sum(std::string_view columnName = "",
1258  const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
1259  {
1260  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1261  auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
1262  return CreateAction<RDFInternal::ActionTypes::Sum, T>(userColumns, sumV);
1263  }
1264  // clang-format on
1265 
1266  ////////////////////////////////////////////////////////////////////////////
1267  /// \brief Gather filtering statistics
1268  ///
1269  /// Calling `Report` on the main `RDataFrame` object gathers stats for
1270  /// all named filters in the call graph. Calling this method on a
1271  /// stored chain state (i.e. a graph node different from the first) gathers
1272  /// the stats for all named filters in the chain section between the original
1273  /// `RDataFrame` and that node (included). Stats are gathered in the same
1274  /// order as the named filters have been added to the graph.
1275  /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
1276  /// effects cuts had.
1277  ///
1278  /// This action is *lazy*: upon invocation of
1279  /// this method the calculation is booked but not executed. See RResultPtr
1280  /// documentation.
1281 
1283  {
1284  bool returnEmptyReport = false;
1285  // if this is a RInterface<RLoopManager> on which `Define` has been called, users
1286  // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
1287  // certainly does not contain named filters.
1288  // The number 2 takes into account the implicit columns for entry and slot number
1289  if (std::is_same<Proxied, RLoopManager>::value && fValidCustomColumns.size() > 2)
1290  returnEmptyReport = true;
1291 
1292  auto lm = GetLoopManager();
1293  auto rep = std::make_shared<RCutFlowReport>();
1294  using Helper_t = RDFInternal::ReportHelper<Proxied>;
1295  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1296  auto action =
1297  std::make_shared<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}), *fProxiedPtr);
1298  lm->Book(action);
1299  return MakeResultPtr(rep, lm, action.get());
1300  }
1301 
1302  /////////////////////////////////////////////////////////////////////////////
1303  /// \brief Returns the names of the available columns
1304  ///
1305  /// This is not an action nor a transformation, just a simple utility to
1306  /// get columns names out of the RDataFrame nodes.
1308  {
1309  ColumnNames_t allColumns;
1310 
1311  auto addIfNotInternal = [&allColumns](std::string_view colName) {
1312  if (!RDFInternal::IsInternalColumn(colName))
1313  allColumns.emplace_back(colName);
1314  };
1315 
1316  std::for_each(fValidCustomColumns.begin(), fValidCustomColumns.end(), addIfNotInternal);
1317 
1318  auto df = GetLoopManager();
1319  auto tree = df->GetTree();
1320  if (tree) {
1321  auto branchNames = RDFInternal::GetBranchNames(*tree);
1322  allColumns.insert(allColumns.end(), branchNames.begin(), branchNames.end());
1323  }
1324 
1325  if (fDataSource) {
1326  auto &dsColNames = fDataSource->GetColumnNames();
1327  allColumns.insert(allColumns.end(), dsColNames.begin(), dsColNames.end());
1328  }
1329 
1330  return allColumns;
1331  }
1332 
1333  // clang-format off
1334  ////////////////////////////////////////////////////////////////////////////
1335  /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
1336  /// \tparam F The type of the aggregator callable. Automatically deduced.
1337  /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
1338  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1339  /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
1340  /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
1341  /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
1342  /// \param[in] aggIdentity The aggregator variable of each thread is initialised to this value (or is default-constructed if the parameter is omitted)
1343  ///
1344  /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
1345  /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
1346  /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
1347  /// the value of the column columnName.
1348  /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
1349  /// Otherwise the signature of aggregator must be `void(U&,T)`.
1350  ///
1351  /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
1352  /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
1353  /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
1354  ///
1355  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
1356  // clang-format on
1357  template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
1358  typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
1359  typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
1362  RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
1363  {
1364  RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
1365  auto loopManager = GetLoopManager();
1366  const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1367  constexpr auto nColumns = ArgTypes::list_size;
1368  const auto validColumnNames = GetValidatedColumnNames(1, columns);
1369  if (fDataSource)
1370  RDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, *fDataSource,
1371  std::make_index_sequence<nColumns>(), ArgTypes());
1372  auto accObjPtr = std::make_shared<U>(aggIdentity);
1373  using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
1374  using Action_t = typename RDFInternal::RAction<Helper_t, Proxied>;
1375  auto action = std::make_shared<Action_t>(
1376  Helper_t(std::move(aggregator), std::move(merger), accObjPtr, loopManager->GetNSlots()), validColumnNames,
1377  *fProxiedPtr);
1378  loopManager->Book(action);
1379  return MakeResultPtr(accObjPtr, loopManager, action.get());
1380  }
1381 
1382  // clang-format off
1383  ////////////////////////////////////////////////////////////////////////////
1384  /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
1385  /// \tparam F The type of the aggregator callable. Automatically deduced.
1386  /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
1387  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1388  /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
1389  /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
1390  /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
1391  ///
1392  /// See previous Aggregate overload for more information.
1393  // clang-format on
1394  template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
1395  typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
1398  RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
1399  {
1400  static_assert(
1401  std::is_default_constructible<U>::value,
1402  "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
1403  return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
1404  }
1405 
1406  // clang-format off
1407  ////////////////////////////////////////////////////////////////////////////
1408  /// \brief Book execution of a custom action using a user-defined helper object.
1409  /// \tparam ColumnTypes List of types of columns used by this action.
1410  /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
1411  ///
1412  /// This method books a custom action for execution. The behavior of the action is completely dependent on the
1413  /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
1414  /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
1415  ///
1416  /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
1417  /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
1418  /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
1419  /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
1420  /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
1421  /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
1422  /// the requested columns for the particular entry being processed.
1423  /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
1424  /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
1425  /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
1426  /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations.
1427  // Can be no-op.
1428  /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents
1429  /// of the result.
1430  /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
1431  /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
1432  /// method concurrently, but will always pass different 'slot' numbers.
1433  /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
1434  /// Result_t). The RResultPtr returned by Book will point to this object.
1435  ///
1436  /// See $ROOTSYS/tree/treeplayer/inc/ROOT/RDFActionHelpers.hxx for the helpers used by standard RDF actions.
1437  // clang-format on
1438  template <typename... ColumnTypes, typename Helper>
1440  {
1441  RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columns.size());
1442 
1443  // TODO add more static sanity checks on Helper
1444  using AH = RDFDetail::RActionImpl<Helper>;
1445  static_assert(std::is_base_of<AH, Helper>::value && std::is_convertible<Helper *, AH *>::value,
1446  "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
1447  auto lm = GetLoopManager();
1448  using Action_t = typename RDFInternal::RAction<Helper, Proxied, TTraits::TypeList<ColumnTypes...>>;
1449  auto resPtr = h.GetResultPtr();
1450  auto action = std::make_shared<Action_t>(Helper(std::forward<Helper>(h)), columns, *fProxiedPtr);
1451  lm->Book(action);
1452  return MakeResultPtr(resPtr, lm, action.get());
1453  }
1454 
1455 private:
1457  {
1458  auto lm = GetLoopManager();
1459  ColumnNames_t validColNames = {};
1460 
1461  // Entry number column
1462  const auto entryColName = "tdfentry_";
1463  auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
1464  DefineImpl<decltype(entryColGen), RDFDetail::TCCHelperTypes::TSlotAndEntry>(entryColName, std::move(entryColGen),
1465  {});
1466  fValidCustomColumns.emplace_back(entryColName);
1467 
1468  // Slot number column
1469  const auto slotColName = "tdfslot_";
1470  auto slotColGen = [](unsigned int slot) { return slot; };
1471  DefineImpl<decltype(slotColGen), RDFDetail::TCCHelperTypes::TSlot>(slotColName, std::move(slotColGen), {});
1472  fValidCustomColumns.emplace_back(slotColName);
1473  }
1474 
1476  {
1477  const auto theRegexSize = columnNameRegexp.size();
1478  std::string theRegex(columnNameRegexp);
1479 
1480  const auto isEmptyRegex = 0 == theRegexSize;
1481  // This is to avoid cases where branches called b1, b2, b3 are all matched by expression "b"
1482  if (theRegexSize > 0 && theRegex[0] != '^')
1483  theRegex = "^" + theRegex;
1484  if (theRegexSize > 0 && theRegex[theRegexSize - 1] != '$')
1485  theRegex = theRegex + "$";
1486 
1487  ColumnNames_t selectedColumns;
1488  selectedColumns.reserve(32);
1489 
1490  // Since we support gcc48 and it does not provide in its stl std::regex,
1491  // we need to use TRegexp
1492  TRegexp regexp(theRegex);
1493  int dummy;
1494  for (auto &&branchName : fValidCustomColumns) {
1495  if ((isEmptyRegex || -1 != regexp.Index(branchName.c_str(), &dummy)) &&
1497  selectedColumns.emplace_back(branchName);
1498  }
1499  }
1500 
1501  auto df = GetLoopManager();
1502  auto tree = df->GetTree();
1503  if (tree) {
1504  auto branchNames = RDFInternal::GetTopLevelBranchNames(*tree);
1505  for (auto &branchName : branchNames) {
1506  if (isEmptyRegex || -1 != regexp.Index(branchName, &dummy)) {
1507  selectedColumns.emplace_back(branchName);
1508  }
1509  }
1510  }
1511 
1512  if (fDataSource) {
1513  auto &dsColNames = fDataSource->GetColumnNames();
1514  for (auto &dsColName : dsColNames) {
1515  if ((isEmptyRegex || -1 != regexp.Index(dsColName.c_str(), &dummy)) &&
1516  !RDFInternal::IsInternalColumn(dsColName)) {
1517  selectedColumns.emplace_back(dsColName);
1518  }
1519  }
1520  }
1521 
1522  if (selectedColumns.empty()) {
1523  std::string text(callerName);
1524  if (columnNameRegexp.empty()) {
1525  text = ": there is no column available to match.";
1526  } else {
1527  text = ": regex \"" + columnNameRegexp + "\" did not match any column.";
1528  }
1529  throw std::runtime_error(text);
1530  }
1531  return selectedColumns;
1532  }
1533 
1534  /// Return string containing fully qualified type name of the node pointed by fProxied.
1535  /// The method is only defined for RInterface<{RFilterBase,RCustomColumnBase,RRangeBase,RLoopManager}> as it should
1536  /// only be called on "upcast" RInterfaces.
1537  inline static std::string GetNodeTypeName();
1538 
1539  // Type was specified by the user, no need to infer it
1540  template <typename ActionType, typename... BranchTypes, typename ActionResultType,
1541  typename std::enable_if<!RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
1542  RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r)
1543  {
1544  auto lm = GetLoopManager();
1545  constexpr auto nColumns = sizeof...(BranchTypes);
1546  const auto selectedCols = GetValidatedColumnNames(nColumns, columns);
1547  if (fDataSource)
1548  RDFInternal::DefineDataSourceColumns(selectedCols, *lm, *fDataSource, std::make_index_sequence<nColumns>(),
1549  RDFInternal::TypeList<BranchTypes...>());
1550  const auto nSlots = lm->GetNSlots();
1551  auto actionPtr =
1552  RDFInternal::BuildAndBook<BranchTypes...>(selectedCols, r, nSlots, *lm, *fProxiedPtr, (ActionType *)nullptr);
1553  return MakeResultPtr(r, lm, actionPtr);
1554  }
1555 
1556  // User did not specify type, do type inference
1557  // This version of CreateAction has a `nColumns` optional argument. If present, the number of required columns for
1558  // this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(BranchTypes)
1559  template <typename ActionType, typename... BranchTypes, typename ActionResultType,
1560  typename std::enable_if<RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
1562  CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r, const int nColumns = -1)
1563  {
1564  auto lm = GetLoopManager();
1565  auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(BranchTypes));
1566  const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
1567  const unsigned int nSlots = lm->GetNSlots();
1568  const auto &customColumns = lm->GetCustomColumnNames();
1569  auto tree = lm->GetTree();
1570  auto rOnHeap = RDFInternal::MakeSharedOnHeap(r);
1571  auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
1573  upcastNode, fImplWeakPtr, fValidCustomColumns, fBranchNames, fDataSource);
1574  auto resultProxyAndActionPtrPtr = MakeResultPtr(r, lm);
1575  auto &resultProxy = resultProxyAndActionPtrPtr.first;
1576  auto actionPtrPtrOnHeap = RDFInternal::MakeSharedOnHeap(resultProxyAndActionPtrPtr.second);
1577  auto toJit =
1578  RDFInternal::JitBuildAndBook(validColumnNames, upcastInterface.GetNodeTypeName(), upcastNode.get(),
1579  typeid(std::shared_ptr<ActionResultType>), typeid(ActionType), rOnHeap, tree,
1580  nSlots, customColumns, fDataSource, actionPtrPtrOnHeap, lm->GetID());
1581  lm->ToJit(toJit);
1582  return resultProxy;
1583  }
1584 
1585  template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
1586  typename std::enable_if<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>::type
1587  DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
1588  {
1589  auto loopManager = GetLoopManager();
1590  RDFInternal::CheckCustomColumn(name, loopManager->GetTree(), loopManager->GetCustomColumnNames(),
1591  fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
1592 
1593  using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
1594  using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
1595  std::is_same<CustomColumnType, RDFDetail::TCCHelperTypes::TSlot>::value, ArgTypes_t>::type;
1596  using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
1597  std::is_same<CustomColumnType, RDFDetail::TCCHelperTypes::TSlotAndEntry>::value, ColTypesTmp_t>::type;
1598 
1599  constexpr auto nColumns = ColTypes_t::list_size;
1600  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1601  if (fDataSource)
1602  RDFInternal::DefineDataSourceColumns(validColumnNames, *loopManager, *fDataSource,
1603  std::make_index_sequence<nColumns>(), ColTypes_t());
1605 
1606  // Declare return type to the interpreter, for future use by jitted actions
1607  auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
1608  std::string retTypeNameFwdDecl; // different from "" only if the type does not exist
1609  if (retTypeName.empty()) {
1610  // If we are here, it means that the type is not known to the interpreter.
1611  // We extract its name, forward declare it and add a meaningful comment.
1612  // This string will be jitted flawlessly. If the user later on uses the product of this define
1613  // and therefore an incomplete type, the interpreter will prompt an error and also display
1614  // the comment we nicely built which reminds the user about the absence of information about
1615  // this type in the interpreter.
1616  int errCode(0);
1617  retTypeName = TClassEdit::DemangleTypeIdName(typeid(RetType), errCode);
1618  retTypeNameFwdDecl = "class " + retTypeName + ";/* Did you forget to declare type " + retTypeName + " in the interpreter?*/";
1619  }
1620  const auto retTypeDeclaration = "namespace __tdf" + std::to_string(loopManager->GetID()) + " { " + retTypeNameFwdDecl + " using " +
1621  std::string(name) + "_type = " + retTypeName + "; }";
1622  gInterpreter->Declare(retTypeDeclaration.c_str());
1623 
1624  loopManager->Book(std::make_shared<NewCol_t>(name, std::move(expression), validColumnNames, loopManager.get()));
1625  loopManager->AddCustomColumnName(name);
1626  RInterface<Proxied> newInterface(fProxiedPtr, fImplWeakPtr, fValidCustomColumns, fBranchNames, fDataSource);
1627  newInterface.fValidCustomColumns.emplace_back(name);
1628  return newInterface;
1629  }
1630 
1631  // This overload is chosen when the callable passed to Define or DefineSlot returns void.
1632  // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
1633  // this way compilation of `Define` has no way to continue after throwing the error.
1634  template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
1635  typename std::enable_if<!std::is_convertible<F, std::string>::value &&
1636  !std::is_default_constructible<RetType>::value,
1639  {
1640  static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
1641  "Error in `Define`: type returned by expression is not default-constructible");
1642  return *this; // never reached
1643  }
1644 
1645  ////////////////////////////////////////////////////////////////////////////
1646  /// \brief Implementation of snapshot
1647  /// \param[in] treename The name of the TTree
1648  /// \param[in] filename The name of the TFile
1649  /// \param[in] columnList The list of names of the branches to be written
1650  /// The implementation exploits Foreach. The association of the addresses to
1651  /// the branches takes place at the first event. This is possible because
1652  /// since there are no copies, the address of the value passed by reference
1653  /// is the address pointing to the storage of the read/created object in/by
1654  /// the TTreeReaderValue/TemporaryBranch
1655  template <typename... ColumnTypes>
1657  const ColumnNames_t &columnList, const RSnapshotOptions &options)
1658  {
1659  RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnList.size());
1660 
1661  auto lm = GetLoopManager();
1662  const auto validCols = GetValidatedColumnNames(columnList.size(), columnList);
1663 
1664  if (fDataSource)
1665  RDFInternal::DefineDataSourceColumns(validCols, *lm, *fDataSource, std::index_sequence_for<ColumnTypes...>(),
1666  TTraits::TypeList<ColumnTypes...>());
1667 
1668  const std::string fullTreename(treename);
1669  // split name into directory and treename if needed
1670  const auto lastSlash = treename.rfind('/');
1671  std::string_view dirname = "";
1672  if (std::string_view::npos != lastSlash) {
1673  dirname = treename.substr(0, lastSlash);
1674  treename = treename.substr(lastSlash + 1, treename.size());
1675  }
1676 
1677  // add action node to functional graph and run event loop
1678  std::shared_ptr<RDFInternal::RActionBase> actionPtr;
1679  if (!ROOT::IsImplicitMTEnabled()) {
1680  // single-thread snapshot
1681  using Helper_t = RDFInternal::SnapshotHelper<ColumnTypes...>;
1682  using Action_t = RDFInternal::RAction<Helper_t, Proxied, TTraits::TypeList<ColumnTypes...>>;
1683  actionPtr.reset(new Action_t(Helper_t(filename, dirname, treename, validCols, columnList, options), validCols,
1684  *fProxiedPtr));
1685  } else {
1686  // multi-thread snapshot
1687  using Helper_t = RDFInternal::SnapshotHelperMT<ColumnTypes...>;
1688  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1689  actionPtr.reset(
1690  new Action_t(Helper_t(lm->GetNSlots(), filename, dirname, treename, validCols, columnList, options),
1691  validCols, *fProxiedPtr));
1692  }
1693 
1694  lm->Book(actionPtr);
1695 
1696  // create new RDF
1698  // Now we mimic a constructor for the RDataFrame. We cannot invoke it here
1699  // since this would introduce a cyclic headers dependency.
1700 
1701  // Keep these two statements separated to work-around an ABI incompatibility
1702  // between clang (and thus cling) and gcc in the way std::forward is handled.
1703  // See https://sft.its.cern.ch/jira/browse/ROOT-9236 for more detail.
1704  auto rlm_ptr = std::make_shared<RLoopManager>(nullptr, validCols);
1705  auto snapshotRDF = std::make_shared<RInterface<RLoopManager>>(rlm_ptr);
1706 
1707  auto chain = std::make_shared<TChain>(fullTreename.c_str());
1708  chain->Add(std::string(filename).c_str());
1709  snapshotRDF->fProxiedPtr->SetTree(chain);
1710 
1711  auto snapshotRDFResPtr = MakeResultPtr(snapshotRDF, lm, actionPtr.get());
1712  if (!options.fLazy) {
1713  *snapshotRDFResPtr;
1714  }
1715 
1716  return snapshotRDFResPtr;
1717  }
1718 
1719  ////////////////////////////////////////////////////////////////////////////
1720  /// \brief Implementation of cache
1721  template <typename... BranchTypes, std::size_t... S>
1723  {
1724 
1725  // Check at compile time that the columns types are copy constructible
1726  constexpr bool areCopyConstructible =
1727  RDFInternal::TEvalAnd<std::is_copy_constructible<BranchTypes>::value...>::value;
1728  static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
1729 
1730  // We share bits and pieces with snapshot. De facto this is a snapshot
1731  // in memory!
1732  RDFInternal::CheckTypesAndPars(sizeof...(BranchTypes), columnList.size());
1733  if (fDataSource) {
1734  auto lm = GetLoopManager();
1735  RDFInternal::DefineDataSourceColumns(columnList, *lm, *fDataSource, s, TTraits::TypeList<BranchTypes...>());
1736  }
1737 
1738  auto colHolders = std::make_tuple(Take<BranchTypes>(columnList[S])...);
1739  auto ds = std::make_unique<RLazyDS<BranchTypes...>>(std::make_pair(columnList[S], std::get<S>(colHolders))...);
1740 
1741  RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnList));
1742 
1743  const std::vector<std::string> columnTypeNames = {RDFInternal::TypeID2TypeName(
1744  typeid(typename std::decay<decltype(std::get<S>(colHolders))>::type::Value_t))...}; // ... expands on S
1745 
1746  return cachedRDF;
1747  }
1748 
1749 protected:
1750  /// Get the RLoopManager if reachable. If not, throw.
1751  std::shared_ptr<RLoopManager> GetLoopManager()
1752  {
1753  auto df = fImplWeakPtr.lock();
1754  if (!df) {
1755  throw std::runtime_error("The main RDataFrame is not reachable: did it go out of scope?");
1756  }
1757  return df;
1758  }
1759 
1760  RInterface(const std::shared_ptr<Proxied> &proxied, const std::weak_ptr<RLoopManager> &impl,
1761  const ColumnNames_t &validColumns, const std::shared_ptr<const ColumnNames_t> &datasetColumns,
1762  RDataSource *ds)
1763  : fProxiedPtr(proxied), fImplWeakPtr(impl), fValidCustomColumns(validColumns),
1764  fDataSource(ds), fBranchNames(datasetColumns)
1765  {
1766  }
1767 
1768  const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
1769 
1770  /// Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames,
1771  /// which is expensive in terms of runtime, is called at most once.
1772  ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
1773  {
1774  auto loopManager = GetLoopManager();
1775  auto tree = loopManager->GetTree();
1776  if (tree && !fBranchNames) {
1777  fBranchNames = std::make_shared<ColumnNames_t>(RDFInternal::GetBranchNames(*tree));
1778  }
1779  return RDFInternal::GetValidatedColumnNames(*loopManager, nColumns, columns,
1780  (tree ? *fBranchNames : ColumnNames_t{}),
1781  fValidCustomColumns, fDataSource);
1782  }
1783 
1784 };
1785 
1786 template <>
1788 {
1789  return "ROOT::Detail::RDF::RFilterBase";
1790 }
1791 
1792 template <>
1794 {
1795  return "ROOT::Detail::RDF::RLoopManager";
1796 }
1797 
1798 template <>
1800 {
1801  return "ROOT::Detail::RDF::RRangeBase";
1802 }
1803 
1804 template <>
1806 {
1807  return "ROOT::Detail::RDF::RJittedFilter";
1808 }
1809 
1810 } // end NS RDF
1811 
1812 } // end NS ROOT
1813 
1814 #endif // ROOT_RDF_INTERFACE
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Creates a custom column.
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
Definition: TypeTraits.hxx:205
std::string JitBuildAndBook(const ColumnNames_t &bl, const std::string &prevNodeTypename, void *prevNode, const std::type_info &art, const std::type_info &at, const void *rOnHeap, TTree *tree, const unsigned int nSlots, const ColumnNames_t &customColumns, RDataSource *ds, const std::shared_ptr< RActionBase *> *const actionPtrPtr, unsigned int namespaceID)
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end)
RResultPtr< typename Helper::Result_t > Book(Helper &&h, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
void BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds)
Smart pointer for the return type of actions.
Definition: RResultPtr.hxx:27
const std::weak_ptr< RLoopManager > fImplWeakPtr
Weak pointer to the RLoopManager at the root of the graph.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column.
ColumnNames_t GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
void Run(unsigned int slot, Long64_t entry) final
Definition: RDFNodes.hxx:390
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const int nColumns=-1)
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
A struct which stores the parameters of a TProfile2D.
RResultPtr< T > Fill(T &&model, const ColumnNames_t &bl)
Return an object of type T on which T::Fill will be called once per event (lazy action) ...
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset&#39;s column names.
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action)
bool IsInternalColumn(std::string_view colName)
A struct which stores the parameters of a TH3D.
std::shared_ptr< RLoopManager > GetLoopManager()
Get the RLoopManager if reachable. If not, throw.
typename TakeFirstParameter< T >::type TakeFirstParameter_t
Definition: TypeTraits.hxx:191
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action)
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1193
double T(double x)
Definition: ChebyshevPol.h:34
ColumnNames_t ConvertRegexToColumns(std::string_view columnNameRegexp, std::string_view callerName)
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
Regular expression class.
Definition: TRegexp.h:31
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action)
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
#define f(i)
Definition: RSha256.hxx:104
static std::string GetNodeTypeName()
Return string containing fully qualified type name of the node pointed by fProxied.
RResultPtr< T > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action) ...
#define gInterpreter
Definition: TInterpreter.h:527
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action)
Profile Histogram.
Definition: TProfile.h:32
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:82
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... > s)
Implementation of cache.
ColumnNames_t fValidCustomColumns
Names of columns Defined for this branch of the functional graph.
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
Implementation of snapshot.
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
A struct which stores the parameters of a TH2D.
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action)
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, const std::shared_ptr< RLoopManager > &df, ROOT::Internal::RDF::RActionBase *actionPtr)
Create a RResultPtr and set its pointer to the corresponding RAction This overload is invoked by non-...
Definition: RResultPtr.hxx:353
A collection of options to steer the creation of the dataset on file.
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action)
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot and the current entry...
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action)
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action)
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
std::shared_ptr<::TH3D > GetHistogram() const
RVec< T > Filter(const RVec< T > &v, F &&f)
Create a new collection with the elements passing the filter expressed by the predicate.
Definition: RVec.hxx:636
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action) ...
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RooArgSet S(const RooAbsArg &v1)
#define F(x, y, z)
RResultPtr<::TH1D > Histo1D(std::string_view vName)
std::string printValue(const TDatime *val)
Print a TDatime at the prompt.
Definition: TDatime.cxx:514
ROOT::R::TRInterface & r
Definition: Object.C:4
3-D histogram with a double per channel (see TH1 documentation)}
Definition: TH3.h:304
std::shared_ptr< RFilterBase > UpcastNode(const std::shared_ptr< RFilterBase > ptr)
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action) ...
std::shared_ptr<::TH2D > GetHistogram() const
const std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
The public interface to the RDataFrame federation of classes.
void BookFilterJit(RJittedFilter *jittedFilter, void *prevNode, std::string_view prevNodeTypeName, std::string_view name, std::string_view expression, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &branches, const ColumnNames_t &customCols, TTree *tree, RDataSource *ds, unsigned int namespaceID)
std::shared_ptr<::TProfile > GetProfile() const
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:41
std::shared_ptr<::TProfile2D > GetProfile() const
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot...
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const ColumnNames_t &datasetColumns, const ColumnNames_t &validCustomColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action) ...
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
#define h(i)
Definition: RSha256.hxx:106
std::enable_if<!std::is_convertible< F, std::string >::value &&!std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view, F, const ColumnNames_t &)
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action) ...
A struct which stores the parameters of a TH1D.
ROOT&#39;s RDataFrame offers a high level interface for analyses of data stored in TTrees, CSV&#39;s and other data formats.
Definition: RDataFrame.hxx:42
RInterface(const std::shared_ptr< Proxied > &proxied, const std::weak_ptr< RLoopManager > &impl, const ColumnNames_t &validColumns, const std::shared_ptr< const ColumnNames_t > &datasetColumns, RDataSource *ds)
std::string ColumnName2ColumnTypeName(const std::string &colName, unsigned int namespaceID, TTree *tree, RDataSource *ds, bool isCustomColumn, bool vector2tvec)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:184
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot.
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action)
Ssiz_t Index(const TString &str, Ssiz_t *len, Ssiz_t start=0) const
Find the first occurrence of the regexp in string and return the position, or -1 if there is no match...
Definition: TRegexp.cxx:209
TText * text
A struct which stores the parameters of a TProfile.
int type
Definition: TGX11.cxx:120
unsigned long long ULong64_t
Definition: RtypesCore.h:70
Print a TSeq at the prompt:
Definition: TDatime.h:115
static RooMathCoreReg dummy
basic_string_view< char > string_view
Definition: RStringView.hxx:35
std::enable_if< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action)
ROOT type_traits extensions.
Definition: TypeTraits.hxx:23
static constexpr double s
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action)
void CheckCustomColumn(std::string_view definedCol, TTree *treePtr, const ColumnNames_t &customCols, const ColumnNames_t &dataSourceColumns)
make_index_sequence< sizeof...(_Tp)> index_sequence_for
Profile2D histograms are used to display the mean value of Z and its RMS for each cell in X...
Definition: TProfile2D.h:27
Extract types from the signature of a callable object. See CallableTraits.
Definition: TypeTraits.hxx:38
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot...
Binding & operator=(OUT(*fun)(void))
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames, which is expensive in terms of runtime, is called at most once.
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:607
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
RInterface(const std::shared_ptr< Proxied > &proxied)
Only enabled when building a RInterface<RLoopManager>
const Int_t kError
Definition: TError.h:39
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r)
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default) ...
#define c(i)
Definition: RSha256.hxx:101
Definition: tree.py:1
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action)
std::shared_ptr<::TH1D > GetHistogram() const
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
Definition: RDataSource.hxx:91
std::shared_ptr< const ColumnNames_t > fBranchNames
Cache of the chain columns names.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
make_integer_sequence< size_t, _Np > make_index_sequence
char name[80]
Definition: TGX11.cxx:109
ColumnNames_t GetBranchNames(TTree &t)
Get all the branches names, including the ones of the friend trees.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action) ...
2-D histogram with a double per channel (see TH1 documentation)}
Definition: TH2.h:291