Logo ROOT  
Reference Guide
RInterface.hxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_RDF_TINTERFACE
12 #define ROOT_RDF_TINTERFACE
13 
14 #include "ROOT/RDataSource.hxx"
17 #include "ROOT/RDF/HistoModels.hxx"
19 #include "ROOT/RDF/RRange.hxx"
20 #include "ROOT/RDF/Utils.hxx"
22 #include "ROOT/RDF/RLazyDSImpl.hxx"
23 #include "ROOT/RResultPtr.hxx"
25 #include "ROOT/RStringView.hxx"
26 #include "ROOT/TypeTraits.hxx"
27 #include "RtypesCore.h" // for ULong64_t
28 #include "TH1.h" // For Histo actions
29 #include "TH2.h" // For Histo actions
30 #include "TH3.h" // For Histo actions
31 #include "TProfile.h"
32 #include "TProfile2D.h"
33 #include "TStatistic.h"
34 
35 #include <algorithm>
36 #include <cstddef>
37 #include <initializer_list>
38 #include <limits>
39 #include <memory>
40 #include <sstream>
41 #include <stdexcept>
42 #include <string>
43 #include <type_traits> // is_same, enable_if
44 #include <typeinfo>
45 #include <vector>
46 
47 class TGraph;
48 
49 // Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
50 namespace ROOT {
51 void DisableImplicitMT();
52 bool IsImplicitMTEnabled();
53 void EnableImplicitMT(UInt_t numthreads);
54 class RDataFrame;
55 namespace Internal {
56 namespace RDF {
57 class GraphCreatorHelper;
58 }
59 } // namespace Internal
60 } // namespace ROOT
61 namespace cling {
62 std::string printValue(ROOT::RDataFrame *tdf);
63 }
64 
65 namespace ROOT {
66 namespace RDF {
67 namespace RDFDetail = ROOT::Detail::RDF;
69 namespace TTraits = ROOT::TypeTraits;
70 
71 template <typename Proxied, typename DataSource>
72 class RInterface;
73 
74 using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
75 
76 // clang-format off
77 /**
78  * \class ROOT::RDF::RInterface
79  * \ingroup dataframe
80  * \brief The public interface to the RDataFrame federation of classes
81  * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
82  * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
83  *
84  * The documentation of each method features a one liner illustrating how to use the method, for example showing how
85  * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
86  */
87 // clang-format on
88 template <typename Proxied, typename DataSource = void>
89 class RInterface {
90  using DS_t = DataSource;
91  using ColumnNames_t = RDFDetail::ColumnNames_t;
95  friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
96  friend class RDFInternal::GraphDrawing::GraphCreatorHelper;
97 
98  template <typename T, typename W>
99  friend class RInterface;
100 
101  std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
102  ///< The RLoopManager at the root of this computation graph. Never null.
104  /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
106 
107  /// Contains the custom columns defined up to this node.
109 
110 public:
111  ////////////////////////////////////////////////////////////////////////////
112  /// \brief Copy-assignment operator for RInterface.
113  RInterface &operator=(const RInterface &) = default;
114 
115  ////////////////////////////////////////////////////////////////////////////
116  /// \brief Copy-ctor for RInterface.
117  RInterface(const RInterface &) = default;
118 
119  ////////////////////////////////////////////////////////////////////////////
120  /// \brief Move-ctor for RInterface.
121  RInterface(RInterface &&) = default;
122 
123  ////////////////////////////////////////////////////////////////////////////
124  /// \brief Only enabled when building a RInterface<RLoopManager>
125  template <typename T = Proxied, typename std::enable_if<std::is_same<T, RLoopManager>::value, int>::type = 0>
126  RInterface(const std::shared_ptr<Proxied> &proxied)
127  : fProxiedPtr(proxied), fLoopManager(proxied.get()), fDataSource(proxied->GetDataSource())
128  {
130  }
131 
132  ////////////////////////////////////////////////////////////////////////////
133  /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
134  /// Different RDataFrame methods return different C++ types. All nodes, however,
135  /// can be cast to this common type at the cost of a small performance penalty.
136  /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
137  /// around via (non-template, C++11) helper functions.
138  /// Example usage:
139  /// ~~~{.cpp}
140  /// // a function that conditionally adds a Range to a RDataFrame node.
141  /// RNode MaybeAddRange(RNode df, bool mustAddRange)
142  /// {
143  /// return mustAddRange ? df.Range(1) : df;
144  /// }
145  /// // use as :
146  /// ROOT::RDataFrame df(10);
147  /// auto maybeRanged = MaybeAddRange(df, true);
148  /// ~~~
149  /// Note that it is not a problem to pass RNode's by value.
150  operator RNode() const
151  {
152  return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fCustomColumns,
153  fDataSource);
154  }
155 
156  ////////////////////////////////////////////////////////////////////////////
157  /// \brief Append a filter to the call graph.
158  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
159  /// signalling whether the event has passed the selection (true) or not (false).
160  /// \param[in] columns Names of the columns/branches in input to the filter function.
161  /// \param[in] name Optional name of this filter. See `Report`.
162  /// \return the filter node of the computation graph.
163  ///
164  /// Append a filter node at the point of the call graph corresponding to the
165  /// object this method is called on.
166  /// The callable `f` should not have side-effects (e.g. modification of an
167  /// external or static variable) to ensure correct results when implicit
168  /// multi-threading is active.
169  ///
170  /// RDataFrame only evaluates filters when necessary: if multiple filters
171  /// are chained one after another, they are executed in order and the first
172  /// one returning false causes the event to be discarded.
173  /// Even if multiple actions or transformations depend on the same filter,
174  /// it is executed once per entry. If its result is requested more than
175  /// once, the cached result is served.
176  ///
177  /// ### Example usage:
178  /// ~~~{.cpp}
179  /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
180  /// auto filtered = df.Filter(myCut, {"x", "y"});
181  ///
182  /// // String: it must contain valid C++ except that column names can be used instead of variable names
183  /// auto filtered = df.Filter("x*y > 0");
184  /// ~~~
185  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
187  Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
188  {
189  RDFInternal::CheckFilter(f);
190  using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
191  constexpr auto nColumns = ColTypes_t::list_size;
192  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
193  const auto newColumns =
194  CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
195 
196  using F_t = RDFDetail::RFilter<F, Proxied>;
197 
198  auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, newColumns, name);
199  fLoopManager->Book(filterPtr.get());
200  return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, newColumns, fDataSource);
201  }
202 
203  ////////////////////////////////////////////////////////////////////////////
204  /// \brief Append a filter to the call graph.
205  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
206  /// signalling whether the event has passed the selection (true) or not (false).
207  /// \param[in] name Optional name of this filter. See `Report`.
208  /// \return the filter node of the computation graph.
209  ///
210  /// Refer to the first overload of this method for the full documentation.
211  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
213  {
214  // The sfinae is there in order to pick up the overloaded method which accepts two strings
215  // rather than this template method.
216  return Filter(f, {}, name);
217  }
218 
219  ////////////////////////////////////////////////////////////////////////////
220  /// \brief Append a filter to the call graph.
221  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
222  /// signalling whether the event has passed the selection (true) or not (false).
223  /// \param[in] columns Names of the columns/branches in input to the filter function.
224  /// \return the filter node of the computation graph.
225  ///
226  /// Refer to the first overload of this method for the full documentation.
227  template <typename F>
228  RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
229  {
230  return Filter(f, ColumnNames_t{columns});
231  }
232 
233  ////////////////////////////////////////////////////////////////////////////
234  /// \brief Append a filter to the call graph.
235  /// \param[in] expression The filter expression in C++
236  /// \param[in] name Optional name of this filter. See `Report`.
237  /// \return the filter node of the computation graph.
238  ///
239  /// The expression is just-in-time compiled and used to filter entries. It must
240  /// be valid C++ syntax in which variable names are substituted with the names
241  /// of branches/columns.
242  ///
243  /// ### Example usage:
244  /// ~~~{.cpp}
245  /// auto filtered_df = df.Filter("myCollection.size() > 3");
246  /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
247  /// ~~~
249  {
250  // deleted by the jitted call to JitFilterHelper
251  auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
252  using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
253  RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
254  const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(fLoopManager, name);
255 
256  RDFInternal::BookFilterJit(jittedFilter, upcastNodeOnHeap, name, expression, fLoopManager->GetAliasMap(),
258 
259  fLoopManager->Book(jittedFilter.get());
261  fDataSource);
262  }
263 
264  // clang-format off
265  ////////////////////////////////////////////////////////////////////////////
266  /// \brief Creates a custom column
267  /// \param[in] name The name of the custom column.
268  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
269  /// \param[in] columns Names of the columns/branches in input to the producer function.
270  /// \return the first node of the computation graph for which the new quantity is defined.
271  ///
272  /// Create a custom column that will be visible from all subsequent nodes
273  /// of the functional chain. The `expression` is only evaluated for entries that pass
274  /// all the preceding filters.
275  /// A new variable is created called `name`, accessible as if it was contained
276  /// in the dataset from subsequent transformations/actions.
277  ///
278  /// Use cases include:
279  /// * caching the results of complex calculations for easy and efficient multiple access
280  /// * extraction of quantities of interest from complex objects
281  ///
282  /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
283  ///
284  /// ### Example usage:
285  /// ~~~{.cpp}
286  /// // assuming a function with signature:
287  /// double myComplexCalculation(const RVec<float> &muon_pts);
288  /// // we can pass it directly to Define
289  /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
290  /// // alternatively, we can pass the body of the function as a string, as in Filter:
291  /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
292  /// ~~~
293  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
295  {
296  return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns);
297  }
298  // clang-format on
299 
300  // clang-format off
301  ////////////////////////////////////////////////////////////////////////////
302  /// \brief Creates a custom column with a value dependent on the processing slot.
303  /// \param[in] name The name of the custom column.
304  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
305  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
306  /// \return the first node of the computation graph for which the new quantity is defined.
307  ///
308  /// This alternative implementation of `Define` is meant as a helper in writing thread-safe custom columns.
309  /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
310  /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
311  /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
312  /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
313  ///
314  /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
315  /// ~~~{.cpp}
316  /// int function(unsigned int, double, double);
317  /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
318  /// df.DefineSlot("x", function, {"column1", "column2"})
319  /// ~~~
320  ///
321  /// See Define for more information.
322  template <typename F>
324  {
325  return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns);
326  }
327  // clang-format on
328 
329  // clang-format off
330  ////////////////////////////////////////////////////////////////////////////
331  /// \brief Creates a custom column with a value dependent on the processing slot and the current entry.
332  /// \param[in] name The name of the custom column.
333  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
334  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
335  /// \return the first node of the computation graph for which the new quantity is defined.
336  ///
337  /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
338  /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
339  /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
340  /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
341  /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter
342  /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
343  ///
344  /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
345  /// ~~~{.cpp}
346  /// int function(unsigned int, ULong64_t, double, double);
347  /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
348  /// DefineSlotEntry("x", function, {"column1", "column2"})
349  /// ~~~
350  ///
351  /// See Define for more information.
352  template <typename F>
354  {
355  return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns);
356  }
357  // clang-format on
358 
359  ////////////////////////////////////////////////////////////////////////////
360  /// \brief Creates a custom column
361  /// \param[in] name The name of the custom column.
362  /// \param[in] expression An expression in C++ which represents the temporary value
363  /// \return the first node of the computation graph for which the new quantity is defined.
364  ///
365  /// The expression is just-in-time compiled and used to produce the column entries.
366  /// It must be valid C++ syntax in which variable names are substituted with the names
367  /// of branches/columns.
368  ///
369  /// Refer to the first overload of this method for the full documentation.
371  {
372  // this check must be done before jitting lest we throw exceptions in jitted code
376 
377  auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
378  auto jittedCustomColumn = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fCustomColumns,
379  fLoopManager->GetBranchNames(), upcastNodeOnHeap);
380 
382  newCols.AddName(name);
383  newCols.AddColumn(jittedCustomColumn, name);
384 
385  RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
386 
387  return newInterface;
388  }
389 
390  ////////////////////////////////////////////////////////////////////////////
391  /// \brief Allow to refer to a column with a different name
392  /// \param[in] alias name of the column alias
393  /// \param[in] columnName of the column to be aliased
394  /// \return the first node of the computation graph for which the alias is available.
395  ///
396  /// Aliasing an alias is supported.
397  ///
398  /// ### Example usage:
399  /// ~~~{.cpp}
400  /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
401  /// ~~~
403  {
404  // The symmetry with Define is clear. We want to:
405  // - Create globally the alias and return this very node, unchanged
406  // - Make aliases accessible based on chains and not globally
407 
408  // Helper to find out if a name is a column
409  auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
410 
411  // If the alias name is a column name, there is a problem
413  fLoopManager->GetAliasMap(), dsColumnNames);
414 
415  const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
416 
417  fLoopManager->AddColumnAlias(std::string(alias), validColumnName);
418 
420 
421  newCols.AddName(alias);
422  RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
423 
424  return newInterface;
425  }
426 
427  ////////////////////////////////////////////////////////////////////////////
428  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
429  /// \tparam ColumnTypes variadic list of branch/column types.
430  /// \param[in] treename The name of the output TTree.
431  /// \param[in] filename The name of the output TFile.
432  /// \param[in] columnList The list of names of the columns/branches to be written.
433  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
434  /// \return a `RDataFrame` that wraps the snapshotted dataset.
435  ///
436  /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
437  /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
438  /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
439  /// written out and it appears before the array in the columnList.
440  ///
441  /// ### Example invocations:
442  ///
443  /// ~~~{.cpp}
444  /// // without specifying template parameters (column types automatically deduced)
445  /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
446  ///
447  /// // specifying template parameters ("x" is `int`, "y" is `float`)
448  /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
449  /// ~~~
450  ///
451  /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
452  /// `RSnapshotOptions`:
453  /// ~~~{.cpp}
454  /// RSnapshotOptions opts;
455  /// opts.fLazy = true;
456  /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
457  /// ~~~
458  template <typename... ColumnTypes>
460  Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
461  const RSnapshotOptions &options = RSnapshotOptions())
462  {
463  return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
464  }
465 
466  ////////////////////////////////////////////////////////////////////////////
467  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
468  /// \param[in] treename The name of the output TTree.
469  /// \param[in] filename The name of the output TFile.
470  /// \param[in] columnList The list of names of the columns/branches to be written.
471  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
472  /// \return a `RDataFrame` that wraps the snapshotted dataset.
473  ///
474  /// This function returns a `RDataFrame` built with the output tree as a source.
475  /// The types of the columns are automatically inferred and do not need to be specified.
476  ///
477  /// See above for a more complete description and example usages.
479  const ColumnNames_t &columnList,
480  const RSnapshotOptions &options = RSnapshotOptions())
481  {
482  // Early return: if the list of columns is empty, just return an empty RDF
483  // If we proceed, the jitted call will not compile!
484  if (columnList.empty()) {
485  auto nEntries = *this->Count();
486  auto snapshotRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(nEntries));
487  return MakeResultPtr(snapshotRDF, *fLoopManager, nullptr);
488  }
489  std::stringstream snapCall;
490  auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
491  RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
493 
494  // build a string equivalent to
495  // "resPtr = (RInterface<nodetype*>*)(this)->Snapshot<Ts...>(args...)"
497  snapCall << "*reinterpret_cast<ROOT::RDF::RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>>*>("
498  << RDFInternal::PrettyPrintAddr(&resPtr)
499  << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
500  << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Snapshot<";
501 
502  const auto validColumnNames = GetValidatedColumnNames(columnList.size(), columnList);
503  const auto colTypes = GetValidatedArgTypes(validColumnNames, fCustomColumns, fLoopManager->GetTree(), fDataSource,
504  "Snapshot", /*vector2rvec=*/false);
505 
506  for (auto &colType : colTypes)
507  snapCall << colType << ", ";
508  if (!colTypes.empty())
509  snapCall.seekp(-2, snapCall.cur); // remove the last ",
510  snapCall << ">(\"" << treename << "\", \"" << filename << "\", "
511  << "*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
512  << RDFInternal::PrettyPrintAddr(&columnList) << "),"
513  << "*reinterpret_cast<ROOT::RDF::RSnapshotOptions*>(" << RDFInternal::PrettyPrintAddr(&options) << "));";
514  // jit snapCall, return result
515  RDFInternal::InterpreterCalc(snapCall.str(), "Snapshot");
516  return resPtr;
517  }
518 
519  // clang-format off
520  ////////////////////////////////////////////////////////////////////////////
521  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
522  /// \param[in] treename The name of the output TTree.
523  /// \param[in] filename The name of the output TFile.
524  /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
525  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
526  /// \return a `RDataFrame` that wraps the snapshotted dataset.
527  ///
528  /// This function returns a `RDataFrame` built with the output tree as a source.
529  /// The types of the columns are automatically inferred and do not need to be specified.
530  ///
531  /// See above for a more complete description and example usages.
533  std::string_view columnNameRegexp = "",
534  const RSnapshotOptions &options = RSnapshotOptions())
535  {
536  auto selectedColumns = RDFInternal::ConvertRegexToColumns(fCustomColumns,
538  fDataSource,
539  columnNameRegexp,
540  "Snapshot");
541  return Snapshot(treename, filename, selectedColumns, options);
542  }
543  // clang-format on
544 
545  // clang-format off
546  ////////////////////////////////////////////////////////////////////////////
547  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
548  /// \param[in] treename The name of the output TTree.
549  /// \param[in] filename The name of the output TFile.
550  /// \param[in] columnList The list of names of the columns/branches to be written.
551  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
552  /// \return a `RDataFrame` that wraps the snapshotted dataset.
553  ///
554  /// This function returns a `RDataFrame` built with the output tree as a source.
555  /// The types of the columns are automatically inferred and do not need to be specified.
556  ///
557  /// See above for a more complete description and example usages.
559  std::initializer_list<std::string> columnList,
560  const RSnapshotOptions &options = RSnapshotOptions())
561  {
562  ColumnNames_t selectedColumns(columnList);
563  return Snapshot(treename, filename, selectedColumns, options);
564  }
565  // clang-format on
566 
567  ////////////////////////////////////////////////////////////////////////////
568  /// \brief Save selected columns in memory
569  /// \tparam ColumnTypes variadic list of branch/column types.
570  /// \param[in] columnList columns to be cached in memory.
571  /// \return a `RDataFrame` that wraps the cached dataset.
572  ///
573  /// This action returns a new `RDataFrame` object, completely detached from
574  /// the originating `RDataFrame`. The new dataframe only contains the cached
575  /// columns and stores their content in memory for fast, zero-copy subsequent access.
576  ///
577  /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
578  /// fits in memory and that will be accessed many times.
579  ///
580  /// ### Example usage:
581  ///
582  /// **Types and columns specified:**
583  /// ~~~{.cpp}
584  /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
585  /// ~~~
586  ///
587  /// **Types inferred and columns specified (this invocation relies on jitting):**
588  /// ~~~{.cpp}
589  /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
590  /// ~~~
591  ///
592  /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
593  /// ~~~{.cpp}
594  /// auto cache_all_cols_df = df.Cache(myRegexp);
595  /// ~~~
596  template <typename... ColumnTypes>
598  {
599  auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
600  return CacheImpl<ColumnTypes...>(columnList, staticSeq);
601  }
602 
603  ////////////////////////////////////////////////////////////////////////////
604  /// \brief Save selected columns in memory
605  /// \param[in] columnList columns to be cached in memory
606  /// \return a `RDataFrame` that wraps the cached dataset.
607  ///
608  /// See the previous overloads for more information.
610  {
611  // Early return: if the list of columns is empty, just return an empty RDF
612  // If we proceed, the jitted call will not compile!
613  if (columnList.empty()) {
614  auto nEntries = *this->Count();
615  RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
616  return emptyRDF;
617  }
618 
619  std::stringstream cacheCall;
620  auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
621  RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
623  // build a string equivalent to
624  // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
625  RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
626  cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
627  << RDFInternal::PrettyPrintAddr(&resRDF)
628  << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
629  << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
630 
631  const auto validColumnNames = GetValidatedColumnNames(columnList.size(), columnList);
632  const auto colTypes = GetValidatedArgTypes(validColumnNames, fCustomColumns, fLoopManager->GetTree(), fDataSource,
633  "Cache", /*vector2rvec=*/false);
634  for (const auto &colType : colTypes)
635  cacheCall << colType << ", ";
636  if (!columnList.empty())
637  cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
638  cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
639  << RDFInternal::PrettyPrintAddr(&columnList) << "));";
640  // jit cacheCall, return result
641  RDFInternal::InterpreterCalc(cacheCall.str(), "Cache");
642  return resRDF;
643  }
644 
645  ////////////////////////////////////////////////////////////////////////////
646  /// \brief Save selected columns in memory
647  /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
648  /// \return a `RDataFrame` that wraps the cached dataset.
649  ///
650  /// The existing columns are matched against the regular expression. If the string provided
651  /// is empty, all columns are selected. See the previous overloads for more information.
653  {
654 
656  columnNameRegexp, "Cache");
657  return Cache(selectedColumns);
658  }
659 
660  ////////////////////////////////////////////////////////////////////////////
661  /// \brief Save selected columns in memory
662  /// \param[in] columnList columns to be cached in memory.
663  /// \return a `RDataFrame` that wraps the cached dataset.
664  ///
665  /// See the previous overloads for more information.
666  RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
667  {
668  ColumnNames_t selectedColumns(columnList);
669  return Cache(selectedColumns);
670  }
671 
672  // clang-format off
673  ////////////////////////////////////////////////////////////////////////////
674  /// \brief Creates a node that filters entries based on range: [begin, end)
675  /// \param[in] begin Initial entry number considered for this range.
676  /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
677  /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
678  /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
679  ///
680  /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
681  /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
682  ///
683  /// ### Example usage:
684  /// ~~~{.cpp}
685  /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
686  /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
687  /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
688  /// ~~~
689  // clang-format on
690  RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
691  {
692  // check invariants
693  if (stride == 0 || (end != 0 && end < begin))
694  throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
695  CheckIMTDisabled("Range");
696 
698  auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
699  fLoopManager->Book(rangePtr.get());
701  return tdf_r;
702  }
703 
704  // clang-format off
705  ////////////////////////////////////////////////////////////////////////////
706  /// \brief Creates a node that filters entries based on range
707  /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
708  /// \return a node of the computation graph for which the range is defined.
709  ///
710  /// See the other Range overload for a detailed description.
711  // clang-format on
712  RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
713 
714  // clang-format off
715  ////////////////////////////////////////////////////////////////////////////
716  /// \brief Execute a user-defined function on each entry (*instant action*)
717  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
718  /// \param[in] columns Names of the columns/branches in input to the user function.
719  ///
720  /// The callable `f` is invoked once per entry. This is an *instant action*:
721  /// upon invocation, an event loop as well as execution of all scheduled actions
722  /// is triggered.
723  /// Users are responsible for the thread-safety of this callable when executing
724  /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
725  ///
726  /// ### Example usage:
727  /// ~~~{.cpp}
728  /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
729  /// ~~~
730  // clang-format on
731  template <typename F>
732  void Foreach(F f, const ColumnNames_t &columns = {})
733  {
734  using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
735  using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
736  ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
737  }
738 
739  // clang-format off
740  ////////////////////////////////////////////////////////////////////////////
741  /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*)
742  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
743  /// \param[in] columns Names of the columns/branches in input to the user function.
744  ///
745  /// Same as `Foreach`, but the user-defined function takes an extra
746  /// `unsigned int` as its first parameter, the *processing slot index*.
747  /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
748  /// for each thread of execution.
749  /// This is meant as a helper in writing thread-safe `Foreach`
750  /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
751  /// The user-defined processing callable is able to follow different
752  /// *streams of processing* indexed by the first parameter.
753  /// `ForeachSlot` works just as well with single-thread execution: in that
754  /// case `slot` will always be `0`.
755  ///
756  /// ### Example usage:
757  /// ~~~{.cpp}
758  /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
759  /// ~~~
760  // clang-format on
761  template <typename F>
762  void ForeachSlot(F f, const ColumnNames_t &columns = {})
763  {
764  using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
765  constexpr auto nColumns = ColTypes_t::list_size;
766 
767  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
768 
769  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
770 
771  using Helper_t = RDFInternal::ForeachSlotHelper<F>;
773 
774  auto action =
775  std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, std::move(newColumns));
776  fLoopManager->Book(action.get());
777 
778  fLoopManager->Run();
779  }
780 
781  // clang-format off
782  ////////////////////////////////////////////////////////////////////////////
783  /// \brief Execute a user-defined reduce operation on the values of a column.
784  /// \tparam F The type of the reduce callable. Automatically deduced.
785  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
786  /// \param[in] f A callable with signature `T(T,T)`
787  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
788  /// \return the reduced quantity wrapped in a `RResultPtr`.
789  ///
790  /// A reduction takes two values of a column and merges them into one (e.g.
791  /// by summing them, taking the maximum, etc). This action performs the
792  /// specified reduction operation on all processed column values, returning
793  /// a single value of the same type. The callable f must satisfy the general
794  /// requirements of a *processing function* besides having signature `T(T,T)`
795  /// where `T` is the type of column columnName.
796  ///
797  /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
798  /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
799  /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
800  /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
801  /// overload.
802  ///
803  /// ### Example usage:
804  /// ~~~{.cpp}
805  /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
806  /// ~~~
807  ///
808  /// This action is *lazy*: upon invocation of this method the calculation is
809  /// booked but not executed. See RResultPtr documentation.
810  // clang-format on
811  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
813  {
814  static_assert(
815  std::is_default_constructible<T>::value,
816  "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
817  return Reduce(std::move(f), columnName, T());
818  }
819 
820  ////////////////////////////////////////////////////////////////////////////
821  /// \brief Execute a user-defined reduce operation on the values of a column.
822  /// \tparam F The type of the reduce callable. Automatically deduced.
823  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
824  /// \param[in] f A callable with signature `T(T,T)`
825  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
826  /// \param[in] redIdentity The reduced object of each thread is initialised to this value.
827  /// \return the reduced quantity wrapped in a `RResultPtr`.
828  ///
829  /// ### Example usage:
830  /// ~~~{.cpp}
831  /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
832  /// ~~~
833  /// See the description of the first Reduce overload for more information.
834  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
835  RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
836  {
837  return Aggregate(f, f, columnName, redIdentity);
838  }
839 
840  ////////////////////////////////////////////////////////////////////////////
841  /// \brief Return the number of entries processed (*lazy action*)
842  /// \return the number of entries wrapped in a `RResultPtr`.
843  ///
844  /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
845  /// This action is *lazy*: upon invocation of this method the calculation is
846  /// booked but not executed. See RResultPtr documentation.
847  ///
848  /// ### Example usage:
849  /// ~~~{.cpp}
850  /// auto nEntriesAfterCuts = myFilteredDf.Count();
851  /// ~~~
852  ///
854  {
855  const auto nSlots = fLoopManager->GetNSlots();
856  auto cSPtr = std::make_shared<ULong64_t>(0);
857  using Helper_t = RDFInternal::CountHelper;
859  auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
861  fLoopManager->Book(action.get());
862  return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
863  }
864 
865  ////////////////////////////////////////////////////////////////////////////
866  /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default)
867  /// \tparam T The type of the column.
868  /// \tparam COLL The type of collection used to store the values.
869  /// \param[in] column The name of the column to collect the values of.
870  /// \return the content of the selected column wrapped in a `RResultPtr`.
871  ///
872  /// The collection type to be specified for C-style array columns is `RVec<T>`:
873  /// in this case the returned collection is a `std::vector<RVec<T>>`.
874  /// ### Example usage:
875  /// ~~~{.cpp}
876  /// // In this case intCol is a std::vector<int>
877  /// auto intCol = rdf.Take<int>("integerColumn");
878  /// // Same content as above but in this case taken as a RVec<int>
879  /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
880  /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
881  /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
882  /// ~~~
883  /// This action is *lazy*: upon invocation of this method the calculation is
884  /// booked but not executed. See RResultPtr documentation.
885  template <typename T, typename COLL = std::vector<T>>
887  {
888  const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
889 
890  const auto validColumnNames = GetValidatedColumnNames(1, columns);
891 
892  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<1>(), TTraits::TypeList<T>());
893 
894  using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
896  auto valuesPtr = std::make_shared<COLL>();
897  const auto nSlots = fLoopManager->GetNSlots();
898 
899  auto action =
900  std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, std::move(newColumns));
901  fLoopManager->Book(action.get());
902  return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
903  }
904 
905  ////////////////////////////////////////////////////////////////////////////
906  /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
907  /// \tparam V The type of the column used to fill the histogram.
908  /// \param[in] model The returned histogram will be constructed using this as a model.
909  /// \param[in] vName The name of the column that will fill the histogram.
910  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
911  ///
912  /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
913  /// is filled with each one of the elements of the container. In case multiple columns of container type
914  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
915  /// possibly different lengths between events).
916  /// This action is *lazy*: upon invocation of this method the calculation is
917  /// booked but not executed. See RResultPtr documentation.
918  ///
919  /// ### Example usage:
920  /// ~~~{.cpp}
921  /// // Deduce column type (this invocation needs jitting internally)
922  /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
923  /// // Explicit column type
924  /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
925  /// ~~~
926  ///
927  template <typename V = RDFDetail::RInferredType>
928  RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
929  {
930  const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
931 
932  const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
933 
934  std::shared_ptr<::TH1D> h(nullptr);
935  {
936  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
937  h = model.GetHistogram();
938  h->SetDirectory(nullptr);
939  }
940 
941  if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
942  RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
943  return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h);
944  }
945 
946  ////////////////////////////////////////////////////////////////////////////
947  /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
948  /// \tparam V The type of the column used to fill the histogram.
949  /// \param[in] vName The name of the column that will fill the histogram.
950  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
951  ///
952  /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
953  /// The "name" and "title" strings are built starting from the input column name.
954  /// See the description of the first Histo1D overload for more details.
955  ///
956  /// ### Example usage:
957  /// ~~~{.cpp}
958  /// // Deduce column type (this invocation needs jitting internally)
959  /// auto myHist1 = myDf.Histo1D("myColumn");
960  /// // Explicit column type
961  /// auto myHist2 = myDf.Histo1D<float>("myColumn");
962  /// ~~~
963  ///
964  template <typename V = RDFDetail::RInferredType>
966  {
967  const auto h_name = std::string(vName);
968  const auto h_title = h_name + ";" + h_name + ";count";
969  return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
970  }
971 
972  ////////////////////////////////////////////////////////////////////////////
973  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
974  /// \tparam V The type of the column used to fill the histogram.
975  /// \tparam W The type of the column used as weights.
976  /// \param[in] model The returned histogram will be constructed using this as a model.
977  /// \param[in] vName The name of the column that will fill the histogram.
978  /// \param[in] wName The name of the column that will provide the weights.
979  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
980  ///
981  /// See the description of the first Histo1D overload for more details.
982  ///
983  /// ### Example usage:
984  /// ~~~{.cpp}
985  /// // Deduce column type (this invocation needs jitting internally)
986  /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
987  /// // Explicit column type
988  /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
989  /// ~~~
990  ///
991  template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
993  {
994  const std::vector<std::string_view> columnViews = {vName, wName};
995  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
996  ? ColumnNames_t()
997  : ColumnNames_t(columnViews.begin(), columnViews.end());
998  std::shared_ptr<::TH1D> h(nullptr);
999  {
1000  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1001  h = model.GetHistogram();
1002  }
1003  return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h);
1004  }
1005 
1006  ////////////////////////////////////////////////////////////////////////////
1007  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1008  /// \tparam V The type of the column used to fill the histogram.
1009  /// \tparam W The type of the column used as weights.
1010  /// \param[in] vName The name of the column that will fill the histogram.
1011  /// \param[in] wName The name of the column that will provide the weights.
1012  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1013  ///
1014  /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1015  /// The "name" and "title" strings are built starting from the input column names.
1016  /// See the description of the first Histo1D overload for more details.
1017  ///
1018  /// ### Example usage:
1019  /// ~~~{.cpp}
1020  /// // Deduce column types (this invocation needs jitting internally)
1021  /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1022  /// // Explicit column types
1023  /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1024  /// ~~~
1025  ///
1026  template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1028  {
1029  // We build name and title based on the value and weight column names
1030  std::string str_vName{vName};
1031  std::string str_wName{wName};
1032  const auto h_name = str_vName + "_weighted_" + str_wName;
1033  const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1034  return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1035  }
1036 
1037  ////////////////////////////////////////////////////////////////////////////
1038  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1039  /// \tparam V The type of the column used to fill the histogram.
1040  /// \tparam W The type of the column used as weights.
1041  /// \param[in] model The returned histogram will be constructed using this as a model.
1042  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1043  ///
1044  /// This overload will use the first two default columns as column names.
1045  /// See the description of the first Histo1D overload for more details.
1046  template <typename V, typename W>
1047  RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1048  {
1049  return Histo1D<V, W>(model, "", "");
1050  }
1051 
1052  ////////////////////////////////////////////////////////////////////////////
1053  /// \brief Fill and return a two-dimensional histogram (*lazy action*)
1054  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1055  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1056  /// \param[in] model The returned histogram will be constructed using this as a model.
1057  /// \param[in] v1Name The name of the column that will fill the x axis.
1058  /// \param[in] v2Name The name of the column that will fill the y axis.
1059  /// \return the bidimensional histogram wrapped in a `RResultPtr`.
1060  ///
1061  /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1062  /// is filled with each one of the elements of the container. In case multiple columns of container type
1063  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1064  /// possibly different lengths between events).
1065  /// This action is *lazy*: upon invocation of this method the calculation is
1066  /// booked but not executed. See RResultPtr documentation.
1067  ///
1068  /// ### Example usage:
1069  /// ~~~{.cpp}
1070  /// // Deduce column types (this invocation needs jitting internally)
1071  /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1072  /// // Explicit column types
1073  /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1074  /// ~~~
1075  ///
1076  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1078  {
1079  std::shared_ptr<::TH2D> h(nullptr);
1080  {
1081  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1082  h = model.GetHistogram();
1083  }
1084  if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1085  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1086  }
1087  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1088  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1089  ? ColumnNames_t()
1090  : ColumnNames_t(columnViews.begin(), columnViews.end());
1091  return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h);
1092  }
1093 
1094  ////////////////////////////////////////////////////////////////////////////
1095  /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*)
1096  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1097  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1098  /// \tparam W The type of the column used for the weights of the histogram.
1099  /// \param[in] model The returned histogram will be constructed using this as a model.
1100  /// \param[in] v1Name The name of the column that will fill the x axis.
1101  /// \param[in] v2Name The name of the column that will fill the y axis.
1102  /// \param[in] wName The name of the column that will provide the weights.
1103  /// \return the bidimensional histogram wrapped in a `RResultPtr`.
1104  ///
1105  /// This action is *lazy*: upon invocation of this method the calculation is
1106  /// booked but not executed. See RResultPtr documentation.
1107  /// The user gives up ownership of the model histogram.
1108  ///
1109  /// ### Example usage:
1110  /// ~~~{.cpp}
1111  /// // Deduce column types (this invocation needs jitting internally)
1112  /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1113  /// // Explicit column types
1114  /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1115  /// ~~~
1116  ///
1117  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1118  typename W = RDFDetail::RInferredType>
1121  {
1122  std::shared_ptr<::TH2D> h(nullptr);
1123  {
1124  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1125  h = model.GetHistogram();
1126  }
1127  if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1128  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1129  }
1130  const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1131  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1132  ? ColumnNames_t()
1133  : ColumnNames_t(columnViews.begin(), columnViews.end());
1134  return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h);
1135  }
1136 
1137  template <typename V1, typename V2, typename W>
1139  {
1140  return Histo2D<V1, V2, W>(model, "", "", "");
1141  }
1142 
1143  ////////////////////////////////////////////////////////////////////////////
1144  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1145  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1146  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1147  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1148  /// \param[in] model The returned histogram will be constructed using this as a model.
1149  /// \param[in] v1Name The name of the column that will fill the x axis.
1150  /// \param[in] v2Name The name of the column that will fill the y axis.
1151  /// \param[in] v3Name The name of the column that will fill the z axis.
1152  /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1153  ///
1154  /// This action is *lazy*: upon invocation of this method the calculation is
1155  /// booked but not executed. See RResultPtr documentation.
1156  ///
1157  /// ### Example usage:
1158  /// ~~~{.cpp}
1159  /// // Deduce column types (this invocation needs jitting internally)
1160  /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1161  /// "myValueX", "myValueY", "myValueZ");
1162  /// // Explicit column types
1163  /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1164  /// "myValueX", "myValueY", "myValueZ");
1165  /// ~~~
1166  ///
1167  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1168  typename V3 = RDFDetail::RInferredType>
1170  std::string_view v3Name = "")
1171  {
1172  std::shared_ptr<::TH3D> h(nullptr);
1173  {
1174  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1175  h = model.GetHistogram();
1176  }
1177  if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1178  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1179  }
1180  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1181  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1182  ? ColumnNames_t()
1183  : ColumnNames_t(columnViews.begin(), columnViews.end());
1184  return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h);
1185  }
1186 
1187  ////////////////////////////////////////////////////////////////////////////
1188  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1189  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1190  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1191  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1192  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1193  /// \param[in] model The returned histogram will be constructed using this as a model.
1194  /// \param[in] v1Name The name of the column that will fill the x axis.
1195  /// \param[in] v2Name The name of the column that will fill the y axis.
1196  /// \param[in] v3Name The name of the column that will fill the z axis.
1197  /// \param[in] wName The name of the column that will provide the weights.
1198  /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1199  ///
1200  /// This action is *lazy*: upon invocation of this method the calculation is
1201  /// booked but not executed. See RResultPtr documentation.
1202  ///
1203  /// ### Example usage:
1204  /// ~~~{.cpp}
1205  /// // Deduce column types (this invocation needs jitting internally)
1206  /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1207  /// "myValueX", "myValueY", "myValueZ", "myWeight");
1208  /// // Explicit column types
1209  /// using d_t = double;
1210  /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1211  /// "myValueX", "myValueY", "myValueZ", "myWeight");
1212  /// ~~~
1213  ///
1214  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1215  typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1217  std::string_view v3Name, std::string_view wName)
1218  {
1219  std::shared_ptr<::TH3D> h(nullptr);
1220  {
1221  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1222  h = model.GetHistogram();
1223  }
1224  if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1225  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1226  }
1227  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1228  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1229  ? ColumnNames_t()
1230  : ColumnNames_t(columnViews.begin(), columnViews.end());
1231  return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h);
1232  }
1233 
1234  template <typename V1, typename V2, typename V3, typename W>
1236  {
1237  return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1238  }
1239 
1240  ////////////////////////////////////////////////////////////////////////////
1241  /// \brief Fill and return a graph (*lazy action*)
1242  /// \tparam V1 The type of the column used to fill the x axis of the graph.
1243  /// \tparam V2 The type of the column used to fill the y axis of the graph.
1244  /// \param[in] v1Name The name of the column that will fill the x axis.
1245  /// \param[in] v2Name The name of the column that will fill the y axis.
1246  /// \return the graph wrapped in a `RResultPtr`.
1247  ///
1248  /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1249  /// is filled with each one of the elements of the container.
1250  /// If Multithreading is enabled, the order in which points are inserted is undefined.
1251  /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1252  /// A name and a title to the graph is given based on the input column names.
1253  ///
1254  /// This action is *lazy*: upon invocation of this method the calculation is
1255  /// booked but not executed. See RResultPtr documentation.
1256  ///
1257  /// ### Example usage:
1258  /// ~~~{.cpp}
1259  /// // Deduce column types (this invocation needs jitting internally)
1260  /// auto myGraph1 = myDf.Graph("xValues", "yValues");
1261  /// // Explicit column types
1262  /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
1263  /// ~~~
1264  ///
1265  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1267  {
1268  auto graph = std::make_shared<::TGraph>();
1269  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1270  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1271  ? ColumnNames_t()
1272  : ColumnNames_t(columnViews.begin(), columnViews.end());
1273 
1274  const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
1275 
1276  // We build a default name and title based on the input columns
1277  if (!(validatedColumns[0].empty() && validatedColumns[1].empty())) {
1278  const auto g_name = std::string(v1Name) + "_vs_" + std::string(v2Name);
1279  const auto g_title = std::string(v1Name) + " vs " + std::string(v2Name);
1280  graph->SetNameTitle(g_name.c_str(), g_title.c_str());
1281  graph->GetXaxis()->SetTitle(std::string(v1Name).c_str());
1282  graph->GetYaxis()->SetTitle(std::string(v2Name).c_str());
1283  }
1284 
1285  return CreateAction<RDFInternal::ActionTags::Graph, V1, V2>(validatedColumns, graph);
1286  }
1287 
1288  ////////////////////////////////////////////////////////////////////////////
1289  /// \brief Fill and return a one-dimensional profile (*lazy action*)
1290  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1291  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1292  /// \param[in] model The model to be considered to build the new return value.
1293  /// \param[in] v1Name The name of the column that will fill the x axis.
1294  /// \param[in] v2Name The name of the column that will fill the y axis.
1295  /// \return the monodimensional profile wrapped in a `RResultPtr`.
1296  ///
1297  /// This action is *lazy*: upon invocation of this method the calculation is
1298  /// booked but not executed. See RResultPtr documentation.
1299  ///
1300  /// ### Example usage:
1301  /// ~~~{.cpp}
1302  /// // Deduce column types (this invocation needs jitting internally)
1303  /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1304  /// // Explicit column types
1305  /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1306  /// ~~~
1307  ///
1308  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1311  {
1312  std::shared_ptr<::TProfile> h(nullptr);
1313  {
1314  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1315  h = model.GetProfile();
1316  }
1317 
1318  if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1319  throw std::runtime_error("Profiles with no axes limits are not supported yet.");
1320  }
1321  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1322  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1323  ? ColumnNames_t()
1324  : ColumnNames_t(columnViews.begin(), columnViews.end());
1325  return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h);
1326  }
1327 
1328  ////////////////////////////////////////////////////////////////////////////
1329  /// \brief Fill and return a one-dimensional profile (*lazy action*)
1330  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1331  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1332  /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
1333  /// \param[in] model The model to be considered to build the new return value.
1334  /// \param[in] v1Name The name of the column that will fill the x axis.
1335  /// \param[in] v2Name The name of the column that will fill the y axis.
1336  /// \param[in] wName The name of the column that will provide the weights.
1337  /// \return the monodimensional profile wrapped in a `RResultPtr`.
1338  ///
1339  /// This action is *lazy*: upon invocation of this method the calculation is
1340  /// booked but not executed. See RResultPtr documentation.
1341  ///
1342  /// ### Example usage:
1343  /// ~~~{.cpp}
1344  /// // Deduce column types (this invocation needs jitting internally)
1345  /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
1346  /// // Explicit column types
1347  /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
1348  /// "xValues", "yValues", "weight");
1349  /// ~~~
1350  ///
1351  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1352  typename W = RDFDetail::RInferredType>
1355  {
1356  std::shared_ptr<::TProfile> h(nullptr);
1357  {
1358  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1359  h = model.GetProfile();
1360  }
1361 
1362  if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1363  throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
1364  }
1365  const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1366  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1367  ? ColumnNames_t()
1368  : ColumnNames_t(columnViews.begin(), columnViews.end());
1369  return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h);
1370  }
1371 
1372  template <typename V1, typename V2, typename W>
1374  {
1375  return Profile1D<V1, V2, W>(model, "", "", "");
1376  }
1377 
1378  ////////////////////////////////////////////////////////////////////////////
1379  /// \brief Fill and return a two-dimensional profile (*lazy action*)
1380  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1381  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1382  /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1383  /// \param[in] model The returned profile will be constructed using this as a model.
1384  /// \param[in] v1Name The name of the column that will fill the x axis.
1385  /// \param[in] v2Name The name of the column that will fill the y axis.
1386  /// \param[in] v3Name The name of the column that will fill the z axis.
1387  /// \return the bidimensional profile wrapped in a `RResultPtr`.
1388  ///
1389  /// This action is *lazy*: upon invocation of this method the calculation is
1390  /// booked but not executed. See RResultPtr documentation.
1391  ///
1392  /// ### Example usage:
1393  /// ~~~{.cpp}
1394  /// // Deduce column types (this invocation needs jitting internally)
1395  /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1396  /// "xValues", "yValues", "zValues");
1397  /// // Explicit column types
1398  /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1399  /// "xValues", "yValues", "zValues");
1400  /// ~~~
1401  ///
1402  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1403  typename V3 = RDFDetail::RInferredType>
1405  std::string_view v2Name = "", std::string_view v3Name = "")
1406  {
1407  std::shared_ptr<::TProfile2D> h(nullptr);
1408  {
1409  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1410  h = model.GetProfile();
1411  }
1412 
1413  if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1414  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1415  }
1416  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1417  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1418  ? ColumnNames_t()
1419  : ColumnNames_t(columnViews.begin(), columnViews.end());
1420  return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h);
1421  }
1422 
1423  ////////////////////////////////////////////////////////////////////////////
1424  /// \brief Fill and return a two-dimensional profile (*lazy action*)
1425  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1426  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1427  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1428  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1429  /// \param[in] model The returned histogram will be constructed using this as a model.
1430  /// \param[in] v1Name The name of the column that will fill the x axis.
1431  /// \param[in] v2Name The name of the column that will fill the y axis.
1432  /// \param[in] v3Name The name of the column that will fill the z axis.
1433  /// \param[in] wName The name of the column that will provide the weights.
1434  /// \return the bidimensional profile wrapped in a `RResultPtr`.
1435  ///
1436  /// This action is *lazy*: upon invocation of this method the calculation is
1437  /// booked but not executed. See RResultPtr documentation.
1438  ///
1439  /// ### Example usage:
1440  /// ~~~{.cpp}
1441  /// // Deduce column types (this invocation needs jitting internally)
1442  /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1443  /// "xValues", "yValues", "zValues", "weight");
1444  /// // Explicit column types
1445  /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1446  /// "xValues", "yValues", "zValues", "weight");
1447  /// ~~~
1448  ///
1449  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1450  typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1452  std::string_view v3Name, std::string_view wName)
1453  {
1454  std::shared_ptr<::TProfile2D> h(nullptr);
1455  {
1456  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1457  h = model.GetProfile();
1458  }
1459 
1460  if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1461  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1462  }
1463  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1464  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1465  ? ColumnNames_t()
1466  : ColumnNames_t(columnViews.begin(), columnViews.end());
1467  return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h);
1468  }
1469 
1470  template <typename V1, typename V2, typename V3, typename W>
1472  {
1473  return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
1474  }
1475 
1476  ////////////////////////////////////////////////////////////////////////////
1477  /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1478  ///
1479  /// T must be a type that provides a copy- or move-constructor and a `T::Fill` method that takes as many arguments
1480  /// as the column names pass as columnList. The arguments of `T::Fill` must have type equal to the one of the
1481  /// specified columns (these types are passed as template parameters to this method).
1482  /// \tparam FirstColumn The first type of the column the values of which are used to fill the object.
1483  /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
1484  /// \tparam T The type of the object to fill. Automatically deduced.
1485  /// \param[in] model The model to be considered to build the new return value.
1486  /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1487  /// \return the filled object wrapped in a `RResultPtr`.
1488  ///
1489  /// The user gives up ownership of the model object.
1490  /// The list of column names to be used for filling must always be specified.
1491  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
1492  /// See RResultPtr documentation.
1493  ///
1494  /// ### Example usage:
1495  /// ~~~{.cpp}
1496  /// MyClass obj;
1497  /// auto myFilledObj = myDf.Fill<float>(obj, {"col0", "col1"});
1498  /// ~~~
1499  ///
1500  template <typename FirstColumn, typename... OtherColumns, typename T> // need FirstColumn to disambiguate overloads
1501  RResultPtr<T> Fill(T &&model, const ColumnNames_t &columnList)
1502  {
1503  auto h = std::make_shared<T>(std::forward<T>(model));
1504  if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1505  throw std::runtime_error("The absence of axes limits is not supported yet.");
1506  }
1507  return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h);
1508  }
1509 
1510  ////////////////////////////////////////////////////////////////////////////
1511  /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1512  ///
1513  /// This overload infers the types of the columns specified in columnList at runtime and just-in-time compiles the
1514  /// method with these types. See previous overload for more information.
1515  /// \tparam T The type of the object to fill. Automatically deduced.
1516  /// \param[in] model The model to be considered to build the new return value.
1517  /// \param[in] columnList The name of the columns read to fill the object.
1518  /// \return the filled object wrapped in a `RResultPtr`.
1519  ///
1520  /// This overload of `Fill` infers the type of the specified columns at runtime and just-in-time compiles the
1521  /// previous overload. Check the previous overload for more details on `Fill`.
1522  ///
1523  /// ### Example usage:
1524  /// ~~~{.cpp}
1525  /// MyClass obj;
1526  /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
1527  /// ~~~
1528  ///
1529  template <typename T>
1530  RResultPtr<T> Fill(T &&model, const ColumnNames_t &columnList)
1531  {
1532  auto h = std::make_shared<T>(std::forward<T>(model));
1533  if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1534  throw std::runtime_error("The absence of axes limits is not supported yet.");
1535  }
1536  return CreateAction<RDFInternal::ActionTags::Fill, RDFDetail::RInferredType>(columnList, h, columnList.size());
1537  }
1538 
1539  ////////////////////////////////////////////////////////////////////////////
1540  /// \brief Return a TStatistic object, filled once per event (*lazy action*)
1541  ///
1542  /// \tparam V The type of the value column
1543  /// \param[in] value The name of the column with the values to fill the statistics with.
1544  /// \return the filled TStatistic object wrapped in a `RResultPtr`.
1545  ///
1546  /// ### Example usage:
1547  /// ~~~{.cpp}
1548  /// // Deduce column type (this invocation needs jitting internally)
1549  /// auto stats0 = myDf.Stats("values");
1550  /// // Explicit column type
1551  /// auto stats1 = myDf.Stats<float>("values");
1552  /// ~~~
1553  ///
1554  template <typename V = RDFDetail::RInferredType>
1556  {
1557  ColumnNames_t columns;
1558  if (!value.empty()) {
1559  columns.emplace_back(std::string(value));
1560  }
1561  const auto validColumnNames = GetValidatedColumnNames(1, columns);
1562  if (std::is_same<V, RDFDetail::RInferredType>::value) {
1563  return Fill(TStatistic(), validColumnNames);
1564  } else {
1565  return Fill<V>(TStatistic(), validColumnNames);
1566  }
1567  }
1568 
1569  ////////////////////////////////////////////////////////////////////////////
1570  /// \brief Return a TStatistic object, filled once per event (*lazy action*)
1571  ///
1572  /// \tparam V The type of the value column
1573  /// \tparam W The type of the weight column
1574  /// \param[in] value The name of the column with the values to fill the statistics with.
1575  /// \param[in] weight The name of the column with the weights to fill the statistics with.
1576  /// \return the filled TStatistic object wrapped in a `RResultPtr`.
1577  ///
1578  /// ### Example usage:
1579  /// ~~~{.cpp}
1580  /// // Deduce column types (this invocation needs jitting internally)
1581  /// auto stats0 = myDf.Stats("values", "weights");
1582  /// // Explicit column types
1583  /// auto stats1 = myDf.Stats<int, float>("values", "weights");
1584  /// ~~~
1585  ///
1586  template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1588  {
1589  ColumnNames_t columns{std::string(value), std::string(weight)};
1590  constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
1591  constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
1592  const auto validColumnNames = GetValidatedColumnNames(2, columns);
1593  // We have 3 cases:
1594  // 1. Both types are inferred: we use Fill and let the jit kick in.
1595  // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
1596  // 3. Both types are explicit: we invoke the fully compiled Fill method.
1597  if (vIsInferred && wIsInferred) {
1598  return Fill(TStatistic(), validColumnNames);
1599  } else if (vIsInferred != wIsInferred) {
1600  std::string error("The ");
1601  error += vIsInferred ? "value " : "weight ";
1602  error += "column type is explicit, while the ";
1603  error += vIsInferred ? "weight " : "value ";
1604  error += " is specified to be inferred. This case is not supported: please specify both types or none.";
1605  throw std::runtime_error(error);
1606  } else {
1607  return Fill<V, W>(TStatistic(), validColumnNames);
1608  }
1609  }
1610 
1611  ////////////////////////////////////////////////////////////////////////////
1612  /// \brief Return the minimum of processed column values (*lazy action*)
1613  /// \tparam T The type of the branch/column.
1614  /// \param[in] columnName The name of the branch/column to be treated.
1615  /// \return the minimum value of the selected column wrapped in a `RResultPtr`.
1616  ///
1617  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1618  /// template specialization of this method.
1619  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1620  ///
1621  /// This action is *lazy*: upon invocation of this method the calculation is
1622  /// booked but not executed. See RResultPtr documentation.
1623  ///
1624  /// ### Example usage:
1625  /// ~~~{.cpp}
1626  /// // Deduce column type (this invocation needs jitting internally)
1627  /// auto minVal0 = myDf.Min("values");
1628  /// // Explicit column type
1629  /// auto minVal1 = myDf.Min<double>("values");
1630  /// ~~~
1631  ///
1632  template <typename T = RDFDetail::RInferredType>
1634  {
1635  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1636  using RetType_t = RDFDetail::MinReturnType_t<T>;
1637  auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
1638  return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV);
1639  }
1640 
1641  ////////////////////////////////////////////////////////////////////////////
1642  /// \brief Return the maximum of processed column values (*lazy action*)
1643  /// \tparam T The type of the branch/column.
1644  /// \param[in] columnName The name of the branch/column to be treated.
1645  /// \return the maximum value of the selected column wrapped in a `RResultPtr`.
1646  ///
1647  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1648  /// template specialization of this method.
1649  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1650  ///
1651  /// This action is *lazy*: upon invocation of this method the calculation is
1652  /// booked but not executed. See RResultPtr documentation.
1653  ///
1654  /// ### Example usage:
1655  /// ~~~{.cpp}
1656  /// // Deduce column type (this invocation needs jitting internally)
1657  /// auto maxVal0 = myDf.Max("values");
1658  /// // Explicit column type
1659  /// auto maxVal1 = myDf.Max<double>("values");
1660  /// ~~~
1661  ///
1662  template <typename T = RDFDetail::RInferredType>
1664  {
1665  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1666  using RetType_t = RDFDetail::MaxReturnType_t<T>;
1667  auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
1668  return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV);
1669  }
1670 
1671  ////////////////////////////////////////////////////////////////////////////
1672  /// \brief Return the mean of processed column values (*lazy action*)
1673  /// \tparam T The type of the branch/column.
1674  /// \param[in] columnName The name of the branch/column to be treated.
1675  /// \return the mean value of the selected column wrapped in a `RResultPtr`.
1676  ///
1677  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1678  /// template specialization of this method.
1679  ///
1680  /// This action is *lazy*: upon invocation of this method the calculation is
1681  /// booked but not executed. See RResultPtr documentation.
1682  ///
1683  /// ### Example usage:
1684  /// ~~~{.cpp}
1685  /// // Deduce column type (this invocation needs jitting internally)
1686  /// auto meanVal0 = myDf.Mean("values");
1687  /// // Explicit column type
1688  /// auto meanVal1 = myDf.Mean<double>("values");
1689  /// ~~~
1690  ///
1691  template <typename T = RDFDetail::RInferredType>
1693  {
1694  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1695  auto meanV = std::make_shared<double>(0);
1696  return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV);
1697  }
1698 
1699  ////////////////////////////////////////////////////////////////////////////
1700  /// \brief Return the unbiased standard deviation of processed column values (*lazy action*)
1701  /// \tparam T The type of the branch/column.
1702  /// \param[in] columnName The name of the branch/column to be treated.
1703  /// \return the standard deviation value of the selected column wrapped in a `RResultPtr`.
1704  ///
1705  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1706  /// template specialization of this method.
1707  ///
1708  /// This action is *lazy*: upon invocation of this method the calculation is
1709  /// booked but not executed. See RResultPtr documentation.
1710  ///
1711  /// ### Example usage:
1712  /// ~~~{.cpp}
1713  /// // Deduce column type (this invocation needs jitting internally)
1714  /// auto stdDev0 = myDf.StdDev("values");
1715  /// // Explicit column type
1716  /// auto stdDev1 = myDf.StdDev<double>("values");
1717  /// ~~~
1718  ///
1719  template <typename T = RDFDetail::RInferredType>
1721  {
1722  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1723  auto stdDeviationV = std::make_shared<double>(0);
1724  return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV);
1725  }
1726 
1727  // clang-format off
1728  ////////////////////////////////////////////////////////////////////////////
1729  /// \brief Return the sum of processed column values (*lazy action*)
1730  /// \tparam T The type of the branch/column.
1731  /// \param[in] columnName The name of the branch/column.
1732  /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
1733  /// \return the sum of the selected column wrapped in a `RResultPtr`.
1734  ///
1735  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1736  /// template specialization of this method.
1737  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1738  ///
1739  /// This action is *lazy*: upon invocation of this method the calculation is
1740  /// booked but not executed. See RResultPtr documentation.
1741  ///
1742  /// ### Example usage:
1743  /// ~~~{.cpp}
1744  /// // Deduce column type (this invocation needs jitting internally)
1745  /// auto sum0 = myDf.Sum("values");
1746  /// // Explicit column type
1747  /// auto sum1 = myDf.Sum<double>("values");
1748  /// ~~~
1749  ///
1750  template <typename T = RDFDetail::RInferredType>
1752  Sum(std::string_view columnName = "",
1753  const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
1754  {
1755  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1756  auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
1757  return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV);
1758  }
1759  // clang-format on
1760 
1761  ////////////////////////////////////////////////////////////////////////////
1762  /// \brief Gather filtering statistics
1763  /// \return the resulting `RCutFlowReport` instance wrapped in a `RResultPtr`.
1764  ///
1765  /// Calling `Report` on the main `RDataFrame` object gathers stats for
1766  /// all named filters in the call graph. Calling this method on a
1767  /// stored chain state (i.e. a graph node different from the first) gathers
1768  /// the stats for all named filters in the chain section between the original
1769  /// `RDataFrame` and that node (included). Stats are gathered in the same
1770  /// order as the named filters have been added to the graph.
1771  /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
1772  /// effects cuts had.
1773  ///
1774  /// This action is *lazy*: upon invocation of
1775  /// this method the calculation is booked but not executed. See RResultPtr
1776  /// documentation.
1777  ///
1778  /// ### Example usage:
1779  /// ~~~{.cpp}
1780  /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
1781  /// auto cutReport = filtered3.Report();
1782  /// cutReport->Print();
1783  /// ~~~
1784  ///
1786  {
1787  bool returnEmptyReport = false;
1788  // if this is a RInterface<RLoopManager> on which `Define` has been called, users
1789  // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
1790  // certainly does not contain named filters.
1791  // The number 4 takes into account the implicit columns for entry and slot number
1792  // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
1793  if (std::is_same<Proxied, RLoopManager>::value && fCustomColumns.GetNames().size() > 4)
1794  returnEmptyReport = true;
1795 
1796  auto rep = std::make_shared<RCutFlowReport>();
1797  using Helper_t = RDFInternal::ReportHelper<Proxied>;
1798  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1799 
1800  auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}),
1802 
1803  fLoopManager->Book(action.get());
1804  return MakeResultPtr(rep, *fLoopManager, std::move(action));
1805  }
1806 
1807  /////////////////////////////////////////////////////////////////////////////
1808  /// \brief Returns the names of the available columns
1809  /// \return the container of column names.
1810  ///
1811  /// This is not an action nor a transformation, just a query to the RDataFrame object.
1812  ///
1813  /// ### Example usage:
1814  /// ~~~{.cpp}
1815  /// auto colNames = d.GetColumnNames();
1816  /// // Print columns' names
1817  /// for (auto &&colName : colNames) std::cout << colName << std::endl;
1818  /// ~~~
1819  ///
1821  {
1822  ColumnNames_t allColumns;
1823 
1824  auto addIfNotInternal = [&allColumns](std::string_view colName) {
1825  if (!RDFInternal::IsInternalColumn(colName))
1826  allColumns.emplace_back(colName);
1827  };
1828 
1829  auto columnNames = fCustomColumns.GetNames();
1830 
1831  std::for_each(columnNames.begin(), columnNames.end(), addIfNotInternal);
1832 
1833  auto tree = fLoopManager->GetTree();
1834  if (tree) {
1835  auto branchNames = RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false);
1836  allColumns.insert(allColumns.end(), branchNames.begin(), branchNames.end());
1837  }
1838 
1839  if (fDataSource) {
1840  auto &dsColNames = fDataSource->GetColumnNames();
1841  allColumns.insert(allColumns.end(), dsColNames.begin(), dsColNames.end());
1842  }
1843 
1844  return allColumns;
1845  }
1846 
1847  /////////////////////////////////////////////////////////////////////////////
1848  /// \brief Return the type of a given column as a string.
1849  /// \return the type of the required column.
1850  ///
1851  /// This is not an action nor a transformation, just a query to the RDataFrame object.
1852  ///
1853  /// ### Example usage:
1854  /// ~~~{.cpp}
1855  /// auto colType = d.GetColumnType("columnName");
1856  /// // Print column type
1857  /// std::cout << "Column " << colType << " has type " << colType << std::endl;
1858  /// ~~~
1859  ///
1860  std::string GetColumnType(std::string_view column)
1861  {
1862  const auto col = std::string(column);
1863  const bool convertVector2RVec = true;
1864  RDFDetail::RCustomColumnBase *customCol =
1865  fCustomColumns.HasName(column) ? fCustomColumns.GetColumns().at(col).get() : nullptr;
1867  customCol, convertVector2RVec);
1868  }
1869 
1870  /// \brief Returns the names of the filters created.
1871  /// \return the container of filters names.
1872  ///
1873  /// If called on a root node, all the filters in the computation graph will
1874  /// be printed. For any other node, only the filters upstream of that node.
1875  /// Filters without a name are printed as "Unnamed Filter"
1876  /// This is not an action nor a transformation, just a query to the RDataFrame object.
1877  ///
1878  /// ### Example usage:
1879  /// ~~~{.cpp}
1880  /// auto filtNames = d.GetFilterNames();
1881  /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
1882  /// ~~~
1883  ///
1884  std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
1885 
1886  /// \brief Returns the names of the defined columns
1887  /// \return the container of the defined column names.
1888  ///
1889  /// This is not an action nor a transformation, just a simple utility to
1890  /// get the columns names that have been defined up to the node.
1891  /// If no custom column has been defined, e.g. on a root node, it returns an
1892  /// empty collection.
1893  ///
1894  /// ### Example usage:
1895  /// ~~~{.cpp}
1896  /// auto defColNames = d.GetDefinedColumnNames();
1897  /// // Print defined columns' names
1898  /// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
1899  /// ~~~
1900  ///
1902  {
1903  ColumnNames_t definedColumns;
1904 
1905  auto columns = fCustomColumns.GetColumns();
1906 
1907  for (auto column : columns) {
1908  if (!RDFInternal::IsInternalColumn(column.first) && !column.second->IsDataSourceColumn())
1909  definedColumns.emplace_back(column.first);
1910  }
1911 
1912  return definedColumns;
1913  }
1914 
1915  /// \brief Checks if a column is present in the dataset
1916  /// \return true if the column is available, false otherwise
1917  ///
1918  /// This method checks if a column is part of the input ROOT dataset, has
1919  /// been defined or can be provided by the data source.
1920  ///
1921  /// Example usage:
1922  /// ~~~{.cpp}
1923  /// ROOT::RDataFrame base(1);
1924  /// auto rdf = base.Define("definedColumn", [](){return 0;});
1925  /// rdf.HasColumn("definedColumn"); // true: we defined it
1926  /// rdf.HasColumn("rdfentry_"); // true: it's always there
1927  /// rdf.HasColumn("foo"); // false: it is not there
1928  /// ~~~
1929  bool HasColumn(std::string_view columnName)
1930  {
1931  if (fCustomColumns.HasName(columnName))
1932  return true;
1933 
1934  if (auto tree = fLoopManager->GetTree()) {
1935  const auto &branchNames = fLoopManager->GetBranchNames();
1936  const auto branchNamesEnd = branchNames.end();
1937  if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
1938  return true;
1939  }
1940 
1941  if (fDataSource && fDataSource->HasColumn(columnName))
1942  return true;
1943 
1944  return false;
1945  }
1946 
1947  /// \brief Gets the number of data processing slots
1948  /// \return The number of data processing slots used by this RDataFrame instance
1949  ///
1950  /// This method returns the number of data processing slots used by this RDataFrame
1951  /// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
1952  ///
1953  /// Example usage:
1954  /// ~~~{.cpp}
1955  /// ROOT::EnableImplicitMT(6)
1956  /// ROOT::RDataFrame df(1);
1957  /// std::cout << df.GetNSlots() << std::endl; // prints "6"
1958  /// ~~~
1959  unsigned int GetNSlots() const { return fLoopManager->GetNSlots(); }
1960 
1961  /// \brief Gets the number of event loops run
1962  /// \return The number of event loops run by this RDataFrame instance
1963  ///
1964  /// This method returns the number of events loops run so far by this RDataFrame instance.
1965  ///
1966  /// Example usage:
1967  /// ~~~{.cpp}
1968  /// ROOT::RDataFrame df(1);
1969  /// std::cout << df.GetNRuns() << std::endl; // prints "0"
1970  /// df.Sum("rdfentry_").GetValue(); // trigger the event loop
1971  /// std::cout << df.GetNRuns() << std::endl; // prints "1"
1972  /// df.Sum("rdfentry_").GetValue(); // trigger another event loop
1973  /// std::cout << df.GetNRuns() << std::endl; // prints "2"
1974  /// ~~~
1975  unsigned int GetNRuns() const { return fLoopManager->GetNRuns(); }
1976 
1977  // clang-format off
1978  ////////////////////////////////////////////////////////////////////////////
1979  /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
1980  /// \tparam F The type of the aggregator callable. Automatically deduced.
1981  /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
1982  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1983  /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
1984  /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
1985  /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
1986  /// \param[in] aggIdentity The aggregator variable of each thread is initialised to this value (or is default-constructed if the parameter is omitted)
1987  /// \return the result of the aggregation wrapped in a `RResultPtr`.
1988  ///
1989  /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
1990  /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
1991  /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
1992  /// the value of the column columnName.
1993  /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
1994  /// Otherwise the signature of aggregator must be `void(U&,T)`.
1995  ///
1996  /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
1997  /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
1998  /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
1999  ///
2000  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2001  ///
2002  /// Example usage:
2003  /// ~~~{.cpp}
2004  /// auto aggregator = [](double acc, double x) { return acc * x; };
2005  /// ROOT::EnableImplicitMT();
2006  /// // If multithread is enabled, the aggregator function will be called by more threads
2007  /// // and will produce a vector of partial accumulators.
2008  /// // The merger function performs the final aggregation of these partial results.
2009  /// auto merger = [](std::vector<double> &accumulators) {
2010  /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2011  /// accumulators[0] *= accumulators[i];
2012  /// }
2013  /// };
2014  ///
2015  /// // The accumulator is initialized at this value by every thread.
2016  /// double initValue = 1.;
2017  ///
2018  /// // Multiplies all elements of the column "x"
2019  /// auto result = d.Aggregate(aggregator, merger, columnName, initValue);
2020  /// ~~~
2021  // clang-format on
2022  template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2023  typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2024  typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2025  typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2026  typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2027  RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2028  {
2029  RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2030  const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2031  constexpr auto nColumns = ArgTypes::list_size;
2032 
2033  const auto validColumnNames = GetValidatedColumnNames(1, columns);
2034 
2035  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ArgTypes());
2036 
2037  auto accObjPtr = std::make_shared<U>(aggIdentity);
2038  using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2039  using Action_t = typename RDFInternal::RAction<Helper_t, Proxied>;
2040  auto action = std::make_unique<Action_t>(
2041  Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2042  fProxiedPtr, std::move(newColumns));
2043  fLoopManager->Book(action.get());
2044  return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2045  }
2046 
2047  // clang-format off
2048  ////////////////////////////////////////////////////////////////////////////
2049  /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
2050  /// \tparam F The type of the aggregator callable. Automatically deduced.
2051  /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2052  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2053  /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2054  /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2055  /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2056  /// \return the result of the aggregation wrapped in a `RResultPtr`.
2057  ///
2058  /// See previous Aggregate overload for more information.
2059  // clang-format on
2060  template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2061  typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2062  typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2063  typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2064  RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2065  {
2066  static_assert(
2067  std::is_default_constructible<U>::value,
2068  "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2069  return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2070  }
2071 
2072  // clang-format off
2073  ////////////////////////////////////////////////////////////////////////////
2074  /// \brief Book execution of a custom action using a user-defined helper object.
2075  /// \tparam ColumnTypes List of types of columns used by this action.
2076  /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2077  /// \param[in] helper The Action Helper to be scheduled.
2078  /// \param[in] columns The names of the columns on which the helper acts.
2079  /// \return the result of the helper wrapped in a `RResultPtr`.
2080  ///
2081  /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2082  /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
2083  /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2084  ///
2085  /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
2086  /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
2087  /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
2088  /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
2089  /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2090  /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2091  /// the requested columns for the particular entry being processed.
2092  /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
2093  /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
2094  /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
2095  /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations. Can be no-op.
2096  /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2097  /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
2098  /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
2099  /// method concurrently, but will always pass different 'slot' numbers.
2100  /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
2101  /// Result_t). The RResultPtr returned by Book will point to this object.
2102  ///
2103  /// See ActionHelpers.hxx for the helpers used by standard RDF actions.
2104  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2105  // clang-format on
2106  template <typename... ColumnTypes, typename Helper>
2107  RResultPtr<typename Helper::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {})
2108  {
2109  constexpr auto nColumns = sizeof...(ColumnTypes);
2110  RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columns.size());
2111 
2112  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2113 
2114  // TODO add more static sanity checks on Helper
2115  using AH = RDFDetail::RActionImpl<Helper>;
2116  static_assert(std::is_base_of<AH, Helper>::value && std::is_convertible<Helper *, AH *>::value,
2117  "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2118 
2119  using Action_t = typename RDFInternal::RAction<Helper, Proxied, TTraits::TypeList<ColumnTypes...>>;
2120  auto resPtr = helper.GetResultPtr();
2121 
2122  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
2124 
2125  auto action = std::make_unique<Action_t>(Helper(std::forward<Helper>(helper)), validColumnNames, fProxiedPtr,
2126  RDFInternal::RBookedCustomColumns(newColumns));
2127  fLoopManager->Book(action.get());
2128  return MakeResultPtr(resPtr, *fLoopManager, std::move(action));
2129  }
2130 
2131  ////////////////////////////////////////////////////////////////////////////
2132  /// \brief Provides a representation of the columns in the dataset
2133  /// \tparam ColumnTypes variadic list of branch/column types.
2134  /// \param[in] columnList Names of the columns to be displayed.
2135  /// \param[in] nRows Number of events for each column to be displayed.
2136  /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2137  ///
2138  /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
2139  /// form. RDisplay will either print on the standard output a summarized version through `Print()` or will return a
2140  /// complete version through `AsString()`.
2141  ///
2142  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2143  ///
2144  /// Example usage:
2145  /// ~~~{.cpp}
2146  /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
2147  /// auto d1 = rdf.Display("");
2148  /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
2149  /// auto d2 = d.Display({"x", "y"}, 128);
2150  /// // Printing the short representations, the event loop will run
2151  /// d1->Print();
2152  /// d2->Print();
2153  /// ~~~
2154  template <typename... ColumnTypes>
2155  RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2156  {
2157  CheckIMTDisabled("Display");
2158 
2159  auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2160  return CreateAction<RDFInternal::ActionTags::Display, ColumnTypes...>(columnList, displayer);
2161  }
2162 
2163  ////////////////////////////////////////////////////////////////////////////
2164  /// \brief Provides a representation of the columns in the dataset
2165  /// \param[in] columnList Names of the columns to be displayed.
2166  /// \param[in] nRows Number of events for each column to be displayed.
2167  /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2168  ///
2169  /// This overload automatically infers the column types.
2170  /// See the previous overloads for further details.
2171  RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2172  {
2173  CheckIMTDisabled("Display");
2174  auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2175  return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(columnList, displayer,
2176  columnList.size());
2177  }
2178 
2179  ////////////////////////////////////////////////////////////////////////////
2180  /// \brief Provides a representation of the columns in the dataset
2181  /// \param[in] columnNameRegexp A regular expression to select the columns.
2182  /// \param[in] nRows Number of events for each column to be displayed.
2183  /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2184  ///
2185  /// The existing columns are matched against the regular expression. If the string provided
2186  /// is empty, all columns are selected.
2187  /// See the previous overloads for further details.
2188  RResultPtr<RDisplay> Display(std::string_view columnNameRegexp = "", const int &nRows = 5)
2189  {
2191  columnNameRegexp, "Display");
2192  return Display(selectedColumns, nRows);
2193  }
2194 
2195  ////////////////////////////////////////////////////////////////////////////
2196  /// \brief Provides a representation of the columns in the dataset
2197  /// \param[in] columnList Names of the columns to be displayed.
2198  /// \param[in] nRows Number of events for each column to be displayed.
2199  /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2200  ///
2201  /// See the previous overloads for further details.
2202  RResultPtr<RDisplay> Display(std::initializer_list<std::string> columnList, const int &nRows = 5)
2203  {
2204  ColumnNames_t selectedColumns(columnList);
2205  return Display(selectedColumns, nRows);
2206  }
2207 
2208 private:
2210  {
2212 
2213  // Entry number column
2214  const std::string entryColName = "rdfentry_";
2215  const std::string entryColType = "ULong64_t";
2216  auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
2217  using NewColEntry_t =
2218  RDFDetail::RCustomColumn<decltype(entryColGen), RDFDetail::CustomColExtraArgs::SlotAndEntry>;
2219 
2220  auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
2221  ColumnNames_t{}, fLoopManager->GetNSlots(), newCols);
2222  newCols.AddName(entryColName);
2223  newCols.AddColumn(entryColumn, entryColName);
2224 
2225  // Slot number column
2226  const std::string slotColName = "rdfslot_";
2227  const std::string slotColType = "unsigned int";
2228  auto slotColGen = [](unsigned int slot) { return slot; };
2229  using NewColSlot_t = RDFDetail::RCustomColumn<decltype(slotColGen), RDFDetail::CustomColExtraArgs::Slot>;
2230 
2231  auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen), ColumnNames_t{},
2232  fLoopManager->GetNSlots(), newCols);
2233  newCols.AddName(slotColName);
2234  newCols.AddColumn(slotColumn, slotColName);
2235 
2236  fCustomColumns = std::move(newCols);
2237 
2238  fLoopManager->AddColumnAlias("tdfentry_", entryColName);
2239  fCustomColumns.AddName("tdfentry_");
2240  fLoopManager->AddColumnAlias("tdfslot_", slotColName);
2241  fCustomColumns.AddName("tdfslot_");
2242  }
2243 
2244  std::vector<std::string> GetColumnTypeNamesList(const ColumnNames_t &columnList)
2245  {
2246  std::vector<std::string> types;
2247 
2248  for (auto column : columnList) {
2249  types.push_back(GetColumnType(column));
2250  }
2251  return types;
2252  }
2253 
2255  {
2256  if (ROOT::IsImplicitMTEnabled()) {
2257  std::string error(callerName);
2258  error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
2259  throw std::runtime_error(error);
2260  }
2261  }
2262 
2263  // Type was specified by the user, no need to infer it
2264  template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
2265  typename std::enable_if<!RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
2266  RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r)
2267  {
2268  constexpr auto nColumns = sizeof...(BranchTypes);
2269 
2270  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2271 
2272  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
2274 
2275  const auto nSlots = fLoopManager->GetNSlots();
2276 
2277  auto action = RDFInternal::BuildAction<BranchTypes...>(validColumnNames, r, nSlots, fProxiedPtr, ActionTag{},
2278  std::move(newColumns));
2279  fLoopManager->Book(action.get());
2280  return MakeResultPtr(r, *fLoopManager, std::move(action));
2281  }
2282 
2283  // User did not specify type, do type inference
2284  // This version of CreateAction has a `nColumns` optional argument. If present, the number of required columns for
2285  // this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(BranchTypes)
2286  template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
2287  typename std::enable_if<RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
2289  CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r, const int nColumns = -1)
2290  {
2291  auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(BranchTypes));
2292 
2293  const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
2294  const unsigned int nSlots = fLoopManager->GetNSlots();
2295 
2296  auto tree = fLoopManager->GetTree();
2297  auto rOnHeap = RDFInternal::MakeWeakOnHeap(r);
2298 
2299  auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
2300  using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
2301  RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
2302 
2303  const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(*fLoopManager);
2304  auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
2305 
2306  auto toJit = RDFInternal::JitBuildAction(validColumnNames, upcastNodeOnHeap,
2307  typeid(std::weak_ptr<ActionResultType>), typeid(ActionTag), rOnHeap,
2308  tree, nSlots, fCustomColumns, fDataSource, jittedActionOnHeap);
2309  fLoopManager->Book(jittedAction.get());
2310  fLoopManager->ToJitExec(toJit);
2311  return MakeResultPtr(r, *fLoopManager, jittedAction);
2312  }
2313 
2314  template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
2315  typename std::enable_if<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>::type
2316  DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
2317  {
2321 
2322  using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
2323  using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
2324  std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::Slot>::value, ArgTypes_t>::type;
2325  using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
2326  std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::SlotAndEntry>::value, ColTypesTmp_t>::type;
2327 
2328  constexpr auto nColumns = ColTypes_t::list_size;
2329 
2330  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2331 
2332  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
2333 
2334  // Declare return type to the interpreter, for future use by jitted actions
2335  auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
2336  if (retTypeName.empty()) {
2337  // The type is not known to the interpreter.
2338  // We must not error out here, but if/when this column is used in jitted code
2339  const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
2340  retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
2341  }
2342 
2344  RDFInternal::RBookedCustomColumns newCols(newColumns);
2345  auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
2346  fLoopManager->GetNSlots(), newCols);
2347 
2348  newCols.AddName(name);
2349  newCols.AddColumn(newColumn, name);
2350 
2351  RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
2352 
2353  return newInterface;
2354  }
2355 
2356  // This overload is chosen when the callable passed to Define or DefineSlot returns void.
2357  // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
2358  // this way compilation of `Define` has no way to continue after throwing the error.
2359  template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
2360  bool IsFStringConv = std::is_convertible<F, std::string>::value,
2361  bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
2362  typename std::enable_if<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>::type
2364  {
2365  static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
2366  "Error in `Define`: type returned by expression is not default-constructible");
2367  return *this; // never reached
2368  }
2369 
2370  ////////////////////////////////////////////////////////////////////////////
2371  /// \brief Implementation of snapshot
2372  /// \param[in] treename The name of the TTree
2373  /// \param[in] filename The name of the TFile
2374  /// \param[in] columnList The list of names of the branches to be written
2375  /// The implementation exploits Foreach. The association of the addresses to
2376  /// the branches takes place at the first event. This is possible because
2377  /// since there are no copies, the address of the value passed by reference
2378  /// is the address pointing to the storage of the read/created object in/by
2379  /// the TTreeReaderValue/TemporaryBranch
2380  template <typename... ColumnTypes>
2382  const ColumnNames_t &columnList, const RSnapshotOptions &options)
2383  {
2384  RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnList.size());
2385 
2386  const auto validCols = GetValidatedColumnNames(columnList.size(), columnList);
2387 
2388  auto newColumns = CheckAndFillDSColumns(validCols, std::index_sequence_for<ColumnTypes...>(),
2390 
2391  const std::string fullTreename(treename);
2392  // split name into directory and treename if needed
2393  const auto lastSlash = treename.rfind('/');
2394  std::string_view dirname = "";
2395  if (std::string_view::npos != lastSlash) {
2396  dirname = treename.substr(0, lastSlash);
2397  treename = treename.substr(lastSlash + 1, treename.size());
2398  }
2399 
2400  // add action node to functional graph and run event loop
2401  std::unique_ptr<RDFInternal::RActionBase> actionPtr;
2402  if (!ROOT::IsImplicitMTEnabled()) {
2403  // single-thread snapshot
2404  using Helper_t = RDFInternal::SnapshotHelper<ColumnTypes...>;
2405  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
2406  actionPtr.reset(new Action_t(Helper_t(filename, dirname, treename, validCols, columnList, options), validCols,
2407  fProxiedPtr, std::move(newColumns)));
2408  } else {
2409  // multi-thread snapshot
2410  using Helper_t = RDFInternal::SnapshotHelperMT<ColumnTypes...>;
2411  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
2412  actionPtr.reset(new Action_t(
2413  Helper_t(fLoopManager->GetNSlots(), filename, dirname, treename, validCols, columnList, options), validCols,
2414  fProxiedPtr, std::move(newColumns)));
2415  }
2416 
2417  fLoopManager->Book(actionPtr.get());
2418 
2419  return RDFInternal::CreateSnapshotRDF(validCols, fullTreename, filename, options.fLazy, *fLoopManager,
2420  std::move(actionPtr));
2421  }
2422 
2423  ////////////////////////////////////////////////////////////////////////////
2424  /// \brief Implementation of cache
2425  template <typename... BranchTypes, std::size_t... S>
2426  RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...> s)
2427  {
2428  // Check at compile time that the columns types are copy constructible
2429  constexpr bool areCopyConstructible =
2430  RDFInternal::TEvalAnd<std::is_copy_constructible<BranchTypes>::value...>::value;
2431  static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
2432 
2433  // We share bits and pieces with snapshot. De facto this is a snapshot
2434  // in memory!
2435  RDFInternal::CheckTypesAndPars(sizeof...(BranchTypes), columnList.size());
2436 
2437  auto colHolders = std::make_tuple(Take<BranchTypes>(columnList[S])...);
2438  auto ds = std::make_unique<RLazyDS<BranchTypes...>>(std::make_pair(columnList[S], std::get<S>(colHolders))...);
2439 
2440  RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnList));
2441 
2442  (void)s; // Prevents unused warning
2443 
2444  return cachedRDF;
2445  }
2446 
2447 protected:
2448  RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
2449  const RDFInternal::RBookedCustomColumns &columns, RDataSource *ds)
2450  : fProxiedPtr(proxied), fLoopManager(&lm), fDataSource(ds), fCustomColumns(columns)
2451  {
2452  }
2453 
2455 
2456  const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
2457 
2458  /// Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames,
2459  /// which is expensive in terms of runtime, is called at most once.
2460  ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
2461  {
2463  fDataSource);
2464  }
2465 
2466  template <typename... ColumnTypes, std::size_t... S>
2469  {
2470  return fDataSource ? RDFInternal::AddDSColumns(validCols, fCustomColumns, *fDataSource, fLoopManager->GetNSlots(),
2471  std::index_sequence_for<ColumnTypes...>(),
2473  : fCustomColumns;
2474  }
2475 };
2476 
2477 } // namespace RDF
2478 
2479 } // namespace ROOT
2480 
2481 #endif // ROOT_RDF_INTERFACE
ROOT::Detail::RDF::RFilterBase
Definition: RFilterBase.hxx:36
ROOT::Internal::RDF::InterpreterCalc
Long64_t InterpreterCalc(const std::string &code, const std::string &context)
Definition: RDFUtils.cxx:312
ROOT::RDF::RInterface::Define
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Creates a custom column.
Definition: RInterface.hxx:370
ROOT::RDF::RInterface::fLoopManager
RLoopManager * fLoopManager
Definition: RInterface.hxx:103
ROOT::Detail::RDF::RLoopManager::Run
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
Definition: RLoopManager.cxx:555
ROOT::RDF::RInterface::Histo1D
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
Definition: RInterface.hxx:1027
GraphCreatorHelper
Helper class that provides the operation graph nodes.
ROOT::RDF::RInterface::Histo3D
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
Definition: RInterface.hxx:1235
ROOT::TypeTraits
ROOT type_traits extensions.
Definition: TypeTraits.hxx:27
ROOT::RDF::RInterface::Display
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
Definition: RInterface.hxx:2171
ROOT::RDF::RInterface::AddDefaultColumns
void AddDefaultColumns()
Definition: RInterface.hxx:2209
ROOT::RDF::RInterface::RInterface
RInterface(const std::shared_ptr< Proxied > &proxied)
Only enabled when building a RInterface<RLoopManager>
Definition: RInterface.hxx:126
ROOT::RDF::RInterface::Display
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", const int &nRows=5)
Provides a representation of the columns in the dataset.
Definition: RInterface.hxx:2188
ROOT::RDF::RDataSource::GetColumnNames
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
f
#define f(i)
Definition: RSha256.hxx:122
ROOT::RDF::RInterface::Profile1D
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
Definition: RInterface.hxx:1373
ROOT::Internal::RDF::CheckTypesAndPars
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
Definition: RDFInterfaceUtils.cxx:457
RBookedCustomColumns.hxx
RSnapshotOptions.hxx
ROOT::Detail::RDF::RCustomColumn
Definition: RCustomColumn.hxx:44
ROOT::RDF::RInterface::Histo2D
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
Definition: RInterface.hxx:1138
ROOT::RDF::RInterface::CreateAction
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const int nColumns=-1)
Definition: RInterface.hxx:2289
ROOT::DisableImplicitMT
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition: TROOT.cxx:542
ROOT::RDF::RInterface::Fill
RResultPtr< T > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action)
Definition: RInterface.hxx:1530
F
#define F(x, y, z)
ROOT::RDF::RInterface::Sum
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action)
Definition: RInterface.hxx:1752
tree
Definition: tree.py:1
ROOT::Internal::RDF::RBookedCustomColumns::AddColumn
void AddColumn(const std::shared_ptr< RDFDetail::RCustomColumnBase > &column, std::string_view name)
Internally it recreates the map with the new column, and swaps with the old one.
Definition: RDFBookedCustomColumns.cxx:13
ROOT::RDF::RInterface::DefineSlotEntry
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot and the current entry.
Definition: RInterface.hxx:353
ROOT::RDF::RInterface::StdDev
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action)
Definition: RInterface.hxx:1720
ROOT::RDF::RInterface::GetLoopManager
RLoopManager * GetLoopManager() const
Definition: RInterface.hxx:2454
ROOT::RDF::RInterface::Mean
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action)
Definition: RInterface.hxx:1692
ROOT::RDF::RInterface::Profile2D
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action)
Definition: RInterface.hxx:1404
r
ROOT::R::TRInterface & r
Definition: Object.C:4
ROOT::RDF::RInterface::Filter
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:248
ROOT::RDF::RInterface::GetColumnTypeNamesList
std::vector< std::string > GetColumnTypeNamesList(const ColumnNames_t &columnList)
Definition: RInterface.hxx:2244
ROOT::RDF::RInterface::Histo2D
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action)
Definition: RInterface.hxx:1077
string_view
basic_string_view< char > string_view
Definition: libcpp_string_view.h:785
ROOT::RDF::RInterface::fDataSource
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
Definition: RInterface.hxx:105
ROOT::Internal::RDF::GetValidatedArgTypes
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RBookedCustomColumns &customColumns, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
Definition: RDFInterfaceUtils.cxx:731
ROOT::RDF::TH2DModel
A struct which stores the parameters of a TH2D.
Definition: HistoModels.hxx:45
ROOT::RDF::RInterface::Range
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end)
Definition: RInterface.hxx:690
ROOT::RDF::RInterface::Histo1D
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
Definition: RInterface.hxx:1047
ROOT::Internal::RDF::SnapshotHelperMT
Definition: RAction.hxx:285
ROOT::Internal::RDF::ColumnName2ColumnTypeName
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, RDataSource *ds, RCustomColumnBase *customColumn, bool vector2rvec)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:211
ROOT::TypeTraits::TypeList
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:37
ROOT::RDF::RInterface::Filter
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
Definition: RInterface.hxx:228
Utils.hxx
kError
const Int_t kError
Definition: TError.h:48
ROOT::Detail::RDF::RRangeBase
Definition: RRangeBase.hxx:38
ROOT::Internal::RDF::CheckCustomColumn
void CheckCustomColumn(std::string_view definedCol, TTree *treePtr, const ColumnNames_t &customCols, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &dataSourceColumns)
Definition: RDFInterfaceUtils.cxx:416
TGeant4Unit::s
static constexpr double s
Definition: TGeant4SystemOfUnits.h:168
InterfaceUtils.hxx
ROOT::RDF::RInterface::DefineSlot
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot.
Definition: RInterface.hxx:323
ROOT::RDF::RInterface::Max
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action)
Definition: RInterface.hxx:1663
ROOT::RDF::RInterface::GetFilterNames
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
Definition: RInterface.hxx:1884
ROOT::RDF::RInterface::Graph
RResultPtr<::TGraph > Graph(std::string_view v1Name="", std::string_view v2Name="")
Fill and return a graph (lazy action)
Definition: RInterface.hxx:1266
ROOT::Internal::RDF::CreateSnapshotRDF
HeadNode_t CreateSnapshotRDF(const ColumnNames_t &validCols, std::string_view treeName, std::string_view fileName, bool isLazy, RLoopManager &loopManager, std::unique_ptr< RDFInternal::RActionBase > actionPtr)
Definition: RDFInterfaceUtils.cxx:329
ROOT::Detail::RDF::RRange
Definition: RRange.hxx:42
RooFitShortHand::S
RooArgSet S(const RooAbsArg &v1)
Definition: RooGlobalFunc.cxx:348
ROOT::RDF::RSnapshotOptions
A collection of options to steer the creation of the dataset on file.
Definition: RSnapshotOptions.hxx:34
ROOT::RDF::RInterface::Filter
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:187
ActionHelpers.hxx
ROOT::Internal::RDF::BookFilterJit
void BookFilterJit(const std::shared_ptr< RJittedFilter > &jittedFilter, std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &branches, const RDFInternal::RBookedCustomColumns &customCols, TTree *tree, RDataSource *ds)
Definition: RDFInterfaceUtils.cxx:538
TStatistic.h
ROOT::Internal::RDF::JitBuildAction
std::string JitBuildAction(const ColumnNames_t &bl, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &art, const std::type_info &at, void *rOnHeap, TTree *tree, const unsigned int nSlots, const RDFInternal::RBookedCustomColumns &customCols, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
Definition: RDFInterfaceUtils.cxx:629
ROOT::RDF::TProfile1DModel
A struct which stores the parameters of a TProfile.
Definition: HistoModels.hxx:99
TProfile.h
ROOT::RDF::RInterface::Profile2D
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action)
Definition: RInterface.hxx:1451
ROOT::RDataFrame
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
Definition: RDataFrame.hxx:42
ROOT::RDF::RInterface::Display
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
Definition: RInterface.hxx:2202
ROOT::Detail::RDF::RLoopManager::GetDataSource
RDataSource * GetDataSource() const
Definition: RLoopManager.hxx:149
ROOT::RDF::RNode
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
Definition: InterfaceUtils.hxx:54
ROOT::Detail::RDF::RLoopManager::ToJitExec
void ToJitExec(const std::string &) const
Definition: RLoopManager.cxx:637
ROOT::RDF::RInterface::ForeachSlot
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action)
Definition: RInterface.hxx:762
ROOT::RDF::RResultPtr
Smart pointer for the return type of actions.
Definition: InterfaceUtils.hxx:51
RRange.hxx
ROOT::RDF::RInterface::Histo3D
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action)
Definition: RInterface.hxx:1169
ROOT::Detail::RDF
Definition: GraphUtils.hxx:40
ROOT::RDF::RInterface::Filter
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
Definition: RInterface.hxx:212
ROOT::RDF::RInterface::Reduce
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:835
ROOT::RDF::RDataSource
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
Definition: RDataSource.hxx:105
ROOT::RDF::RInterface::Display
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
Definition: RInterface.hxx:2155
ROOT::Internal::RDF::GetValidatedColumnNames
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const ColumnNames_t &validCustomColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
Definition: RDFInterfaceUtils.cxx:696
ROOT::RDF::RInterface::Histo1D
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action)
Definition: RInterface.hxx:928
Rgl::Range_t
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1195
ROOT::EnableImplicitMT
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition: TROOT.cxx:525
RDataSource.hxx
TProfile2D.h
ROOT::RDF::RLazyDS
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:53
ROOT::Detail::RDF::MakeResultPtr
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, RLoopManager &df, std::shared_ptr< ROOT::Internal::RDF::RActionBase > actionPtr)
Create a RResultPtr and set its pointer to the corresponding RAction This overload is invoked by non-...
Definition: RResultPtr.hxx:354
ROOT::RDF::TH3DModel
A struct which stores the parameters of a TH3D.
Definition: HistoModels.hxx:70
ROOT::RDF::RInterface::GetNRuns
unsigned int GetNRuns() const
Gets the number of event loops run.
Definition: RInterface.hxx:1975
RLazyDSImpl.hxx
RIntegerSequence.hxx
ROOT::Internal::RDF::SnapshotHelper
Definition: RAction.hxx:282
ROOT::RDF::RInterface::fCustomColumns
RDFInternal::RBookedCustomColumns fCustomColumns
Contains the custom columns defined up to this node.
Definition: RInterface.hxx:108
ROOT::RDF::RInterface::Count
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action)
Definition: RInterface.hxx:853
ROOT::Detail::RDF::RLoopManager::GetBranchNames
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
Definition: RLoopManager.cxx:688
ROOT::Internal::RDF::RAction
An action node in a RDF computation graph.
Definition: RAction.hxx:115
h
#define h(i)
Definition: RSha256.hxx:124
ROOT::RDF::RSnapshotOptions::fLazy
bool fLazy
Do not start the event loop when Snapshot is called.
Definition: RSnapshotOptions.hxx:56
ROOT::RDF::RInterface::Take
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default)
Definition: RInterface.hxx:886
ROOT::Detail::RDF::RLoopManager::GetTree
TTree * GetTree() const
Definition: RLoopManager.cxx:583
ROOT::RDF::RInterface::Snapshot
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:558
RStringView.hxx
ROOT::RDF::RInterface::CheckIMTDisabled
void CheckIMTDisabled(std::string_view callerName)
Definition: RInterface.hxx:2254
ROOT::RDF::RInterface< RDFDetail::RLoopManager >::DS_t
DataSource DS_t
Definition: RInterface.hxx:90
ROOT::RDF::RInterface::GetProxiedPtr
const std::shared_ptr< Proxied > & GetProxiedPtr() const
Definition: RInterface.hxx:2456
ROOT::Internal::RDF::PrettyPrintAddr
std::string PrettyPrintAddr(const void *const addr)
Definition: RDFInterfaceUtils.cxx:530
ROOT::Internal::RDF::RBookedCustomColumns
Encapsulates the columns defined by the user.
Definition: RBookedCustomColumns.hxx:45
TypeTraits.hxx
ROOT::RDF::RInterface::DefineImpl
std::enable_if< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
Definition: RInterface.hxx:2316
ROOT::RDF::RInterface::fProxiedPtr
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
Definition: RInterface.hxx:101
ROOT::Detail::RDF::RLoopManager::Book
void Book(RDFInternal::RActionBase *actionPtr)
Definition: RLoopManager.cxx:588
UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:46
ROOT::RDF::RInterface< RDFDetail::RLoopManager >::ColumnNames_t
RDFDetail::ColumnNames_t ColumnNames_t
Definition: RInterface.hxx:91
ROOT::RDF::RInterface::Cache
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:597
ROOT::Internal::RDF::BookDefineJit
std::shared_ptr< RJittedCustomColumn > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RDFInternal::RBookedCustomColumns &customCols, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Definition: RDFInterfaceUtils.cxx:584
ROOT::RDF::RInterface::Snapshot
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:532
ROOT::RDF::RInterface::Histo2D
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action)
Definition: RInterface.hxx:1120
ROOT::RDF::RInterface::SnapshotImpl
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
Implementation of snapshot.
Definition: RInterface.hxx:2381
ROOT::Internal::RDF::IsInternalColumn
bool IsInternalColumn(std::string_view colName)
Definition: RDFInterfaceUtils.cxx:516
TH2.h
void
typedef void((*Func_t)())
ROOT::RDF::RInterface
The public interface to the RDataFrame federation of classes.
Definition: InterfaceUtils.hxx:53
ROOT::RDF::RInterface::Stats
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action)
Definition: RInterface.hxx:1555
ROOT::RDF::RInterface::Min
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action)
Definition: RInterface.hxx:1633
TH3.h
ROOT::RDF::RInterface::CheckAndFillDSColumns
RDFInternal::RBookedCustomColumns CheckAndFillDSColumns(ColumnNames_t validCols, std::index_sequence< S... >, TTraits::TypeList< ColumnTypes... >)
Definition: RInterface.hxx:2468
ROOT::RDF::RInterface::GetDefinedColumnNames
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
Definition: RInterface.hxx:1901
ROOT::RDF::RInterface::Reduce
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:812
ROOT::RDF::RInterface::Histo3D
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action)
Definition: RInterface.hxx:1216
ROOT::Internal::RDF::RBookedCustomColumns::GetColumns
const RCustomColumnBasePtrMap_t & GetColumns() const
Returns the list of the pointers to the defined columns.
Definition: RBookedCustomColumns.hxx:91
ROOT::RDF::RInterface::Histo1D
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
Definition: RInterface.hxx:992
ROOT::RDF::RInterface::Cache
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
Definition: RInterface.hxx:652
ULong64_t
unsigned long long ULong64_t
Definition: RtypesCore.h:74
ROOT::IsImplicitMTEnabled
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:556
ROOT::Detail::RDF::RCustomColumnBase
Definition: RCustomColumnBase.hxx:30
ROOT::RDF::RInterface::Aggregate
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
Definition: RInterface.hxx:2027
TGraph
Definition: TGraph.h:41
RtypesCore.h
ROOT::RDF::RInterface::Snapshot
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:478
ROOT::RDF::RInterface::Profile2D
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
Definition: RInterface.hxx:1471
ROOT::RDF::RInterface::Book
RResultPtr< typename Helper::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
Definition: RInterface.hxx:2107
ROOT::RDF::RInterface::GetValidatedColumnNames
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames,...
Definition: RInterface.hxx:2460
ROOT::Detail::RDF::RLoopManager::AddColumnAlias
void AddColumnAlias(const std::string &alias, const std::string &colName)
Definition: RLoopManager.hxx:165
ROOT::Detail::RDF::RFilter
Definition: RFilter.hxx:60
graph
Definition: graph.py:1
ROOT::RDF::RInterface::Range
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
Definition: RInterface.hxx:712
ROOT::RDF::RInterface::Snapshot
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:460
ROOT::RDF::RInterface::operator=
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
ROOT::Internal::RDF
Definition: RArrowDS.hxx:15
HistoModels.hxx
ROOT::Detail::RDF::RLoopManager::GetAliasMap
const std::map< std::string, std::string > & GetAliasMap() const
Definition: RLoopManager.hxx:166
name
char name[80]
Definition: TGX11.cxx:110
ROOT::RDF::RInterface::Define
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column.
Definition: RInterface.hxx:294
ROOT::RDF::RInterface::Aggregate
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
Definition: RInterface.hxx:2064
ROOT::Math::Chebyshev::T
double T(double x)
Definition: ChebyshevPol.h:52
ROOT::RDF::RInterface::CreateAction
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r)
Definition: RInterface.hxx:2266
ROOT::RDF::RInterface::Alias
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
Definition: RInterface.hxx:402
ROOT::Detail::RDF::RLoopManager::GetNRuns
unsigned int GetNRuns() const
Definition: RLoopManager.hxx:168
ROOT::Internal::RDF::UpcastNode
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
Definition: RDFInterfaceUtils.cxx:686
ROOT::RDF::RInterface::GetColumnNames
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
Definition: RInterface.hxx:1820
ROOT::Internal::RDF::GetFilterNames
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
Definition: RDFInterfaceUtils.cxx:525
make_cnn_model.model
model
Definition: make_cnn_model.py:6
ROOT::RDF::RInterface::CacheImpl
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... > s)
Implementation of cache.
Definition: RInterface.hxx:2426
ROOT::Internal::RDF::AtLeastOneEmptyString
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
Definition: RDFInterfaceUtils.cxx:677
ROOT::RDF::RInterface::RInterface
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RBookedCustomColumns &columns, RDataSource *ds)
Definition: RInterface.hxx:2448
ROOT::RDF::RInterface::Stats
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action)
Definition: RInterface.hxx:1587
ROOT::RDF::RInterface::Fill
RResultPtr< T > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action)
Definition: RInterface.hxx:1501
type
int type
Definition: TGX11.cxx:121
TStatistic
Statistical variable, defined by its mean and variance (RMS). Named, streamable, storable and mergeab...
Definition: TStatistic.h:33
ROOT::RDF::RInterface::Profile1D
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action)
Definition: RInterface.hxx:1354
ROOT::Internal::RDF::RBookedCustomColumns::GetNames
ColumnNames_t GetNames() const
Returns the list of the names of the defined columns.
Definition: RBookedCustomColumns.hxx:87
ROOT::Internal::RDF::RBookedCustomColumns::HasName
bool HasName(std::string_view name) const
Check if the provided name is tracked in the names list.
Definition: RDFBookedCustomColumns.cxx:7
ROOT::RDF::RInterface::GetNSlots
unsigned int GetNSlots() const
Gets the number of data processing slots.
Definition: RInterface.hxx:1959
ROOT::Internal::RDF::GetBranchNames
ColumnNames_t GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
Definition: RLoopManager.cxx:211
ROOT::Internal::RDF::TypeID2TypeName
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:84
ROOT::Detail::RDF::RLoopManager::GetNSlots
unsigned int GetNSlots() const
Definition: RLoopManager.hxx:157
TH1.h
ROOT::RDF::RInterface::Foreach
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action)
Definition: RInterface.hxx:732
ROOT::RDF::RInterface::Cache
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:609
ROOT::RDF::RInterface::Cache
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
Definition: RInterface.hxx:666
ROOT
VSD Structures.
Definition: StringConv.hxx:21
ROOT::RDF::RInterface::DefineImpl
std::enable_if<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view, F, const ColumnNames_t &)
Definition: RInterface.hxx:2363
ROOT::Internal::RDF::ConvertRegexToColumns
ColumnNames_t ConvertRegexToColumns(const RDFInternal::RBookedCustomColumns &customColumns, TTree *tree, ROOT::RDF::RDataSource *dataSource, std::string_view columnNameRegexp, std::string_view callerName)
Definition: RDFInterfaceUtils.cxx:356
RResultPtr.hxx
ROOT::RDF::RDataSource::HasColumn
virtual bool HasColumn(std::string_view colName) const =0
Checks if the dataset has a certain column.
ROOT::RDF::TProfile2DModel
A struct which stores the parameters of a TProfile2D.
Definition: HistoModels.hxx:124
ROOT::RDF::RInterface::RInterface
friend class RInterface
Definition: RInterface.hxx:99
ROOT::RDF::RInterface::Report
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
Definition: RInterface.hxx:1785
ROOT::Detail::RDF::RLoopManager
The head node of a RDF computation graph.
Definition: RLoopManager.hxx:56
ROOT::RDF::RInterface::Profile1D
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action)
Definition: RInterface.hxx:1310
ROOT::RDF::RInterface::GetColumnType
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
Definition: RInterface.hxx:1860
ROOT::RDF::RInterface::Histo1D
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action)
Definition: RInterface.hxx:965
TClassEdit::DemangleTypeIdName
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
Definition: TClassEdit.cxx:2100
int
ROOT::RDF::TH1DModel
A struct which stores the parameters of a TH1D.
Definition: HistoModels.hxx:27
ROOT::RDF::RInterface::HasColumn
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
Definition: RInterface.hxx:1929
ROOT::Internal::RDF::RBookedCustomColumns::AddName
void AddName(std::string_view name)
Internally it recreates the map with the new column name, and swaps with the old one.
Definition: RDFBookedCustomColumns.cxx:21