Logo ROOT  
Reference Guide
RInterface.hxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_RDF_TINTERFACE
12 #define ROOT_RDF_TINTERFACE
13 
14 #include "ROOT/RDataSource.hxx"
17 #include "ROOT/RDF/HistoModels.hxx"
19 #include "ROOT/RDF/RRange.hxx"
20 #include "ROOT/RDF/Utils.hxx"
22 #include "ROOT/RDF/RLazyDSImpl.hxx"
23 #include "ROOT/RResultPtr.hxx"
25 #include "ROOT/RStringView.hxx"
26 #include "ROOT/TypeTraits.hxx"
27 #include "ROOT/InternalTreeUtils.hxx" // for GetFileNamesFromTree and GetFriendInfo
28 #include "RtypesCore.h" // for ULong64_t
29 #include "TDirectory.h"
30 #include "TH1.h" // For Histo actions
31 #include "TH2.h" // For Histo actions
32 #include "TH3.h" // For Histo actions
33 #include "TProfile.h"
34 #include "TProfile2D.h"
35 #include "TStatistic.h"
36 #include "TChain.h" // for checking fLoopManger->GetTree() return type
37 
38 #include <algorithm>
39 #include <cstddef>
40 #include <initializer_list>
41 #include <iterator> // std::back_insterter
42 #include <limits>
43 #include <memory>
44 #include <sstream>
45 #include <stdexcept>
46 #include <string>
47 #include <type_traits> // is_same, enable_if
48 #include <typeinfo>
49 #include <vector>
50 #include <set>
51 
52 class TGraph;
53 
54 // Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
55 namespace ROOT {
56 void DisableImplicitMT();
57 bool IsImplicitMTEnabled();
58 void EnableImplicitMT(UInt_t numthreads);
59 class RDataFrame;
60 namespace Internal {
61 namespace RDF {
62 class GraphCreatorHelper;
63 }
64 } // namespace Internal
65 } // namespace ROOT
66 namespace cling {
67 std::string printValue(ROOT::RDataFrame *tdf);
68 }
69 
70 namespace ROOT {
71 namespace RDF {
72 namespace RDFDetail = ROOT::Detail::RDF;
74 namespace TTraits = ROOT::TypeTraits;
75 
76 template <typename Proxied, typename DataSource>
77 class RInterface;
78 
79 using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
80 
81 // clang-format off
82 /**
83  * \class ROOT::RDF::RInterface
84  * \ingroup dataframe
85  * \brief The public interface to the RDataFrame federation of classes.
86  * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
87  * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
88  *
89  * The documentation of each method features a one liner illustrating how to use the method, for example showing how
90  * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
91  */
92 // clang-format on
93 template <typename Proxied, typename DataSource = void>
94 class RInterface {
95  using DS_t = DataSource;
100  friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
101  friend class RDFInternal::GraphDrawing::GraphCreatorHelper;
102 
103  template <typename T, typename W>
104  friend class RInterface;
105 
106  std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
107  ///< The RLoopManager at the root of this computation graph. Never null.
109  /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
111 
112  /// Contains the custom columns defined up to this node.
114 
115 public:
116  ////////////////////////////////////////////////////////////////////////////
117  /// \brief Copy-assignment operator for RInterface.
118  RInterface &operator=(const RInterface &) = default;
119 
120  ////////////////////////////////////////////////////////////////////////////
121  /// \brief Copy-ctor for RInterface.
122  RInterface(const RInterface &) = default;
123 
124  ////////////////////////////////////////////////////////////////////////////
125  /// \brief Move-ctor for RInterface.
126  RInterface(RInterface &&) = default;
127 
128  ////////////////////////////////////////////////////////////////////////////
129  /// \brief Only enabled when building a RInterface<RLoopManager>.
130  template <typename T = Proxied, typename std::enable_if<std::is_same<T, RLoopManager>::value, int>::type = 0>
131  RInterface(const std::shared_ptr<Proxied> &proxied)
132  : fProxiedPtr(proxied), fLoopManager(proxied.get()), fDataSource(proxied->GetDataSource())
133  {
135  }
136 
137  ////////////////////////////////////////////////////////////////////////////
138  /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
139  /// Different RDataFrame methods return different C++ types. All nodes, however,
140  /// can be cast to this common type at the cost of a small performance penalty.
141  /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
142  /// around via (non-template, C++11) helper functions.
143  /// Example usage:
144  /// ~~~{.cpp}
145  /// // a function that conditionally adds a Range to a RDataFrame node.
146  /// RNode MaybeAddRange(RNode df, bool mustAddRange)
147  /// {
148  /// return mustAddRange ? df.Range(1) : df;
149  /// }
150  /// // use as :
151  /// ROOT::RDataFrame df(10);
152  /// auto maybeRanged = MaybeAddRange(df, true);
153  /// ~~~
154  /// Note that it is not a problem to pass RNode's by value.
155  operator RNode() const
156  {
157  return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fDefines,
158  fDataSource);
159  }
160 
161  ////////////////////////////////////////////////////////////////////////////
162  /// \brief Append a filter to the call graph.
163  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
164  /// signalling whether the event has passed the selection (true) or not (false).
165  /// \param[in] columns Names of the columns/branches in input to the filter function.
166  /// \param[in] name Optional name of this filter. See `Report`.
167  /// \return the filter node of the computation graph.
168  ///
169  /// Append a filter node at the point of the call graph corresponding to the
170  /// object this method is called on.
171  /// The callable `f` should not have side-effects (e.g. modification of an
172  /// external or static variable) to ensure correct results when implicit
173  /// multi-threading is active.
174  ///
175  /// RDataFrame only evaluates filters when necessary: if multiple filters
176  /// are chained one after another, they are executed in order and the first
177  /// one returning false causes the event to be discarded.
178  /// Even if multiple actions or transformations depend on the same filter,
179  /// it is executed once per entry. If its result is requested more than
180  /// once, the cached result is served.
181  ///
182  /// ### Example usage:
183  /// ~~~{.cpp}
184  /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
185  /// auto filtered = df.Filter(myCut, {"x", "y"});
186  ///
187  /// // String: it must contain valid C++ except that column names can be used instead of variable names
188  /// auto filtered = df.Filter("x*y > 0");
189  /// ~~~
190  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
192  Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
193  {
194  RDFInternal::CheckFilter(f);
195  using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
196  constexpr auto nColumns = ColTypes_t::list_size;
197  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
198  CheckAndFillDSColumns(validColumnNames, ColTypes_t());
199 
200  using F_t = RDFDetail::RFilter<F, Proxied>;
201 
202  auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fDefines, name);
203  fLoopManager->Book(filterPtr.get());
204  return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fDefines, fDataSource);
205  }
206 
207  ////////////////////////////////////////////////////////////////////////////
208  /// \brief Append a filter to the call graph.
209  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
210  /// signalling whether the event has passed the selection (true) or not (false).
211  /// \param[in] name Optional name of this filter. See `Report`.
212  /// \return the filter node of the computation graph.
213  ///
214  /// Refer to the first overload of this method for the full documentation.
215  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
217  {
218  // The sfinae is there in order to pick up the overloaded method which accepts two strings
219  // rather than this template method.
220  return Filter(f, {}, name);
221  }
222 
223  ////////////////////////////////////////////////////////////////////////////
224  /// \brief Append a filter to the call graph.
225  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
226  /// signalling whether the event has passed the selection (true) or not (false).
227  /// \param[in] columns Names of the columns/branches in input to the filter function.
228  /// \return the filter node of the computation graph.
229  ///
230  /// Refer to the first overload of this method for the full documentation.
231  template <typename F>
232  RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
233  {
234  return Filter(f, ColumnNames_t{columns});
235  }
236 
237  ////////////////////////////////////////////////////////////////////////////
238  /// \brief Append a filter to the call graph.
239  /// \param[in] expression The filter expression in C++
240  /// \param[in] name Optional name of this filter. See `Report`.
241  /// \return the filter node of the computation graph.
242  ///
243  /// The expression is just-in-time compiled and used to filter entries. It must
244  /// be valid C++ syntax in which variable names are substituted with the names
245  /// of branches/columns.
246  ///
247  /// ### Example usage:
248  /// ~~~{.cpp}
249  /// auto filtered_df = df.Filter("myCollection.size() > 3");
250  /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
251  /// ~~~
253  {
254  // deleted by the jitted call to JitFilterHelper
255  auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
256  using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
257  RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fDefines, fDataSource);
258  const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(fLoopManager, name);
259 
260  RDFInternal::BookFilterJit(jittedFilter, upcastNodeOnHeap, name, expression, fLoopManager->GetAliasMap(),
262 
263  fLoopManager->Book(jittedFilter.get());
265  }
266 
267  // clang-format off
268  ////////////////////////////////////////////////////////////////////////////
269  /// \brief Creates a custom column.
270  /// \param[in] name The name of the custom column.
271  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the custom column.
272  /// \param[in] columns Names of the columns/branches in input to the producer function.
273  /// \return the first node of the computation graph for which the new quantity is defined.
274  ///
275  /// Create a custom column that will be visible from all subsequent nodes
276  /// of the functional chain. The `expression` is only evaluated for entries that pass
277  /// all the preceding filters.
278  /// A new variable is created called `name`, accessible as if it was contained
279  /// in the dataset from subsequent transformations/actions.
280  ///
281  /// Use cases include:
282  /// * caching the results of complex calculations for easy and efficient multiple access
283  /// * extraction of quantities of interest from complex objects
284  ///
285  /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
286  ///
287  /// ### Example usage:
288  /// ~~~{.cpp}
289  /// // assuming a function with signature:
290  /// double myComplexCalculation(const RVec<float> &muon_pts);
291  /// // we can pass it directly to Define
292  /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
293  /// // alternatively, we can pass the body of the function as a string, as in Filter:
294  /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
295  /// ~~~
296  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
298  {
299  return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns, "Define");
300  }
301  // clang-format on
302 
303  // clang-format off
304  ////////////////////////////////////////////////////////////////////////////
305  /// \brief Creates a custom column with a value dependent on the processing slot.
306  /// \param[in] name The name of the custom column.
307  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the custom column.
308  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
309  /// \return the first node of the computation graph for which the new quantity is defined.
310  ///
311  /// This alternative implementation of `Define` is meant as a helper in writing thread-safe custom columns.
312  /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
313  /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
314  /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
315  /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
316  ///
317  /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
318  /// ~~~{.cpp}
319  /// int function(unsigned int, double, double);
320  /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
321  /// df.DefineSlot("x", function, {"column1", "column2"})
322  /// ~~~
323  ///
324  /// See Define for more information.
325  template <typename F>
327  {
328  return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns, "DefineSlot");
329  }
330  // clang-format on
331 
332  // clang-format off
333  ////////////////////////////////////////////////////////////////////////////
334  /// \brief Creates a custom column with a value dependent on the processing slot and the current entry.
335  /// \param[in] name The name of the custom column.
336  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the custom column.
337  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
338  /// \return the first node of the computation graph for which the new quantity is defined.
339  ///
340  /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
341  /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
342  /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
343  /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
344  /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter
345  /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
346  ///
347  /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
348  /// ~~~{.cpp}
349  /// int function(unsigned int, ULong64_t, double, double);
350  /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
351  /// DefineSlotEntry("x", function, {"column1", "column2"})
352  /// ~~~
353  ///
354  /// See Define for more information.
355  template <typename F>
357  {
358  return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns,
359  "DefineSlotEntry");
360  }
361  // clang-format on
362 
363  ////////////////////////////////////////////////////////////////////////////
364  /// \brief Creates a custom column.
365  /// \param[in] name The name of the custom column.
366  /// \param[in] expression An expression in C++ which represents the defined value
367  /// \return the first node of the computation graph for which the new quantity is defined.
368  ///
369  /// The expression is just-in-time compiled and used to produce the column entries.
370  /// It must be valid C++ syntax in which variable names are substituted with the names
371  /// of branches/columns.
372  ///
373  /// Refer to the first overload of this method for the full documentation.
375  {
376  constexpr auto where = "Define";
378  // these checks must be done before jitting lest we throw exceptions in jitted code
382 
383  auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
384  auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fDefines,
385  fLoopManager->GetBranchNames(), upcastNodeOnHeap);
386 
388  newCols.AddColumn(jittedDefine, name);
389 
390  RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
391 
392  return newInterface;
393  }
394 
395  ////////////////////////////////////////////////////////////////////////////
396  /// \brief Creates a custom column
397  /// \param[in] name The name of the custom column.
398  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the custom column.
399  /// \param[in] columns Names of the columns/branches in input to the producer function.
400  /// \return the first node of the computation graph for which the new quantity is defined.
401  ///
402  /// An exception is thrown in case the column to re-define does not already exist.
403  /// See Define() for more information.
404  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
406  {
407  return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns, "Redefine");
408  }
409 
410  // clang-format off
411  ////////////////////////////////////////////////////////////////////////////
412  /// \brief Creates a custom column, possibly overriding an existing one with the same name.
413  /// \param[in] name The name of the custom column.
414  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the custom column.
415  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot).
416  /// \return the first node of the computation graph for which the new quantity is defined.
417  ///
418  /// An exception is thrown in case the column to re-define does not already exist.
419  ///
420  /// See DefineSlot() for more information.
421  // clang-format on
422  template <typename F>
424  {
425  return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns, "RedefineSlot");
426  }
427 
428  // clang-format off
429  ////////////////////////////////////////////////////////////////////////////
430  /// \brief Creates a custom column, possibly overriding an existing one with the same name.
431  /// \param[in] name The name of the custom column.
432  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the custom column.
433  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
434  /// \return the first node of the computation graph for which the new quantity is defined.
435  ///
436  /// An exception is thrown in case the column to re-define does not already exist.
437  ///
438  /// See DefineSlotEntry() for more information.
439  // clang-format on
440  template <typename F>
442  {
443  return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns,
444  "RedefineSlotEntry");
445  }
446 
447  ////////////////////////////////////////////////////////////////////////////
448  /// \brief Creates a custom column, overriding an existing one with the same name.
449  /// \param[in] name The name of the custom column.
450  /// \param[in] expression An expression in C++ which represents the defined value
451  /// \return the first node of the computation graph for which the new quantity is defined.
452  ///
453  /// The expression is just-in-time compiled and used to produce the column entries.
454  /// It must be valid C++ syntax in which variable names are substituted with the names
455  /// of branches/columns.
456  ///
457  /// An exception is thrown in case the column to re-define does not already exist.
458  ///
459  /// Aliases cannot be overridden. See the corresponding Define() overload for more information.
461  {
462  constexpr auto where = "Redefine";
467 
468  auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
469  auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fDefines,
470  fLoopManager->GetBranchNames(), upcastNodeOnHeap);
471 
473  newCols.AddColumn(jittedDefine, name);
474 
475  RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
476 
477  return newInterface;
478  }
479 
480  ////////////////////////////////////////////////////////////////////////////
481  /// \brief Allow to refer to a column with a different name.
482  /// \param[in] alias name of the column alias
483  /// \param[in] columnName of the column to be aliased
484  /// \return the first node of the computation graph for which the alias is available.
485  ///
486  /// Aliasing an alias is supported.
487  ///
488  /// ### Example usage:
489  /// ~~~{.cpp}
490  /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
491  /// ~~~
493  {
494  // The symmetry with Define is clear. We want to:
495  // - Create globally the alias and return this very node, unchanged
496  // - Make aliases accessible based on chains and not globally
497 
498  // Helper to find out if a name is a column
499  auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
500 
501  constexpr auto where = "Alias";
502  RDFInternal::CheckValidCppVarName(alias, where);
503  // If the alias name is a column name, there is a problem
505  fLoopManager->GetBranchNames(), dsColumnNames);
506 
507  const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
508 
509  fLoopManager->AddColumnAlias(std::string(alias), validColumnName);
510 
512 
513  newCols.AddName(alias);
514  RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
515 
516  return newInterface;
517  }
518 
519  ////////////////////////////////////////////////////////////////////////////
520  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
521  /// \tparam ColumnTypes variadic list of branch/column types.
522  /// \param[in] treename The name of the output TTree.
523  /// \param[in] filename The name of the output TFile.
524  /// \param[in] columnList The list of names of the columns/branches to be written.
525  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
526  /// \return a `RDataFrame` that wraps the snapshotted dataset.
527  ///
528  /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
529  /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
530  /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
531  /// written out and it appears before the array in the columnList.
532  ///
533  /// By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other
534  /// types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are
535  /// present, by default all friend top-level branches that have names that do not collide with
536  /// names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out
537  /// friend branches with the same names of branches in the main TTree/TChain with names of the form
538  /// '<friendname>_<branchname>' in order to differentiate them from the branches in the main tree/chain.
539  ///
540  /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of
541  /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled with
542  /// respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in wrong
543  /// associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will
544  /// error out if such a "shuffled" TTree is used in a friendship.
545  ///
546  /// \note In case no events are written out (e.g. because no event passes all filters) the behavior of Snapshot in
547  /// single-thread and multi-thread runs is different: in single-thread runs, Snapshot will write out a TTree with
548  /// the specified name and zero entries; in multi-thread runs, no TTree object will be written out to disk.
549  ///
550  /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns
551  /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
552  /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
553  /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`.
554  ///
555  /// ### Example invocations:
556  ///
557  /// ~~~{.cpp}
558  /// // without specifying template parameters (column types automatically deduced)
559  /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
560  ///
561  /// // specifying template parameters ("x" is `int`, "y" is `float`)
562  /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
563  /// ~~~
564  ///
565  /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
566  /// `RSnapshotOptions`:
567  /// ~~~{.cpp}
568  /// RSnapshotOptions opts;
569  /// opts.fLazy = true;
570  /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
571  /// ~~~
572  template <typename... ColumnTypes>
574  Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
575  const RSnapshotOptions &options = RSnapshotOptions())
576  {
577  return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
578  }
579 
580  ////////////////////////////////////////////////////////////////////////////
581  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
582  /// \param[in] treename The name of the output TTree.
583  /// \param[in] filename The name of the output TFile.
584  /// \param[in] columnList The list of names of the columns/branches to be written.
585  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
586  /// \return a `RDataFrame` that wraps the snapshotted dataset.
587  ///
588  /// This function returns a `RDataFrame` built with the output tree as a source.
589  /// The types of the columns are automatically inferred and do not need to be specified.
590  ///
591  /// See above for a more complete description and example usages.
593  const ColumnNames_t &columnList,
594  const RSnapshotOptions &options = RSnapshotOptions())
595  {
596  const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
597  const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
599 
600  const auto fullTreeName = treename;
601  const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
602  treename = parsedTreePath.fTreeName;
603  const auto &dirname = parsedTreePath.fDirName;
604 
605  auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
606  std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
607 
609  auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, validCols);
610 
611  auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
612  validCols, newRDF, snapHelperArgs, validCols.size());
613 
614  if (!options.fLazy)
615  *resPtr;
616  return resPtr;
617  }
618 
619  // clang-format off
620  ////////////////////////////////////////////////////////////////////////////
621  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
622  /// \param[in] treename The name of the output TTree.
623  /// \param[in] filename The name of the output TFile.
624  /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
625  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
626  /// \return a `RDataFrame` that wraps the snapshotted dataset.
627  ///
628  /// This function returns a `RDataFrame` built with the output tree as a source.
629  /// The types of the columns are automatically inferred and do not need to be specified.
630  ///
631  /// See above for a more complete description and example usages.
633  std::string_view columnNameRegexp = "",
634  const RSnapshotOptions &options = RSnapshotOptions())
635  {
636  const auto definedColumns = fDefines.GetNames();
637  auto *tree = fLoopManager->GetTree();
638  const auto treeBranchNames = tree != nullptr ? RDFInternal::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
639  const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
640  // Ignore __rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
641  ColumnNames_t dsColumnsWithoutSizeColumns;
642  std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
643  [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "__rdf_sizeof_"; });
644  ColumnNames_t columnNames;
645  columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size());
646  columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
647  columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
648  columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end());
649  const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
650  return Snapshot(treename, filename, selectedColumns, options);
651  }
652  // clang-format on
653 
654  // clang-format off
655  ////////////////////////////////////////////////////////////////////////////
656  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
657  /// \param[in] treename The name of the output TTree.
658  /// \param[in] filename The name of the output TFile.
659  /// \param[in] columnList The list of names of the columns/branches to be written.
660  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
661  /// \return a `RDataFrame` that wraps the snapshotted dataset.
662  ///
663  /// This function returns a `RDataFrame` built with the output tree as a source.
664  /// The types of the columns are automatically inferred and do not need to be specified.
665  ///
666  /// See above for a more complete description and example usages.
668  std::initializer_list<std::string> columnList,
669  const RSnapshotOptions &options = RSnapshotOptions())
670  {
671  ColumnNames_t selectedColumns(columnList);
672  return Snapshot(treename, filename, selectedColumns, options);
673  }
674  // clang-format on
675 
676  ////////////////////////////////////////////////////////////////////////////
677  /// \brief Save selected columns in memory.
678  /// \tparam ColumnTypes variadic list of branch/column types.
679  /// \param[in] columnList columns to be cached in memory.
680  /// \return a `RDataFrame` that wraps the cached dataset.
681  ///
682  /// This action returns a new `RDataFrame` object, completely detached from
683  /// the originating `RDataFrame`. The new dataframe only contains the cached
684  /// columns and stores their content in memory for fast, zero-copy subsequent access.
685  ///
686  /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
687  /// fits in memory and that will be accessed many times.
688  ///
689  /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns
690  /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
691  /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
692  /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`.
693  ///
694  /// ### Example usage:
695  ///
696  /// **Types and columns specified:**
697  /// ~~~{.cpp}
698  /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
699  /// ~~~
700  ///
701  /// **Types inferred and columns specified (this invocation relies on jitting):**
702  /// ~~~{.cpp}
703  /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
704  /// ~~~
705  ///
706  /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
707  /// ~~~{.cpp}
708  /// auto cache_all_cols_df = df.Cache(myRegexp);
709  /// ~~~
710  template <typename... ColumnTypes>
712  {
713  auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
714  return CacheImpl<ColumnTypes...>(columnList, staticSeq);
715  }
716 
717  ////////////////////////////////////////////////////////////////////////////
718  /// \brief Save selected columns in memory.
719  /// \param[in] columnList columns to be cached in memory
720  /// \return a `RDataFrame` that wraps the cached dataset.
721  ///
722  /// See the previous overloads for more information.
724  {
725  // Early return: if the list of columns is empty, just return an empty RDF
726  // If we proceed, the jitted call will not compile!
727  if (columnList.empty()) {
728  auto nEntries = *this->Count();
729  RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
730  return emptyRDF;
731  }
732 
733  std::stringstream cacheCall;
734  auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
735  RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
737  // build a string equivalent to
738  // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
739  RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
740  cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
741  << RDFInternal::PrettyPrintAddr(&resRDF)
742  << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
743  << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
744 
745  const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache");
746 
747  const auto validColumnNames =
748  GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
749  const auto colTypes = GetValidatedArgTypes(validColumnNames, fDefines, fLoopManager->GetTree(), fDataSource,
750  "Cache", /*vector2rvec=*/false);
751  for (const auto &colType : colTypes)
752  cacheCall << colType << ", ";
753  if (!columnListWithoutSizeColumns.empty())
754  cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
755  cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
756  << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));";
757 
758  // book the code to jit with the RLoopManager and trigger the event loop
759  fLoopManager->ToJitExec(cacheCall.str());
760  fLoopManager->Jit();
761 
762  return resRDF;
763  }
764 
765  ////////////////////////////////////////////////////////////////////////////
766  /// \brief Save selected columns in memory.
767  /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
768  /// \return a `RDataFrame` that wraps the cached dataset.
769  ///
770  /// The existing columns are matched against the regular expression. If the string provided
771  /// is empty, all columns are selected. See the previous overloads for more information.
773  {
774  const auto definedColumns = fDefines.GetNames();
775  auto *tree = fLoopManager->GetTree();
776  const auto treeBranchNames = tree != nullptr ? RDFInternal::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
777  const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
778  // Ignore __rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
779  ColumnNames_t dsColumnsWithoutSizeColumns;
780  std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
781  [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "__rdf_sizeof_"; });
782  ColumnNames_t columnNames;
783  columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size());
784  columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
785  columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
786  columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end());
787  const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache");
788  return Cache(selectedColumns);
789  }
790 
791  ////////////////////////////////////////////////////////////////////////////
792  /// \brief Save selected columns in memory.
793  /// \param[in] columnList columns to be cached in memory.
794  /// \return a `RDataFrame` that wraps the cached dataset.
795  ///
796  /// See the previous overloads for more information.
797  RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
798  {
799  ColumnNames_t selectedColumns(columnList);
800  return Cache(selectedColumns);
801  }
802 
803  // clang-format off
804  ////////////////////////////////////////////////////////////////////////////
805  /// \brief Creates a node that filters entries based on range: [begin, end).
806  /// \param[in] begin Initial entry number considered for this range.
807  /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
808  /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
809  /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
810  ///
811  /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
812  /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
813  ///
814  /// ### Example usage:
815  /// ~~~{.cpp}
816  /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
817  /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
818  /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
819  /// ~~~
820  // clang-format on
821  RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
822  {
823  // check invariants
824  if (stride == 0 || (end != 0 && end < begin))
825  throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
826  CheckIMTDisabled("Range");
827 
829  auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
830  fLoopManager->Book(rangePtr.get());
832  return tdf_r;
833  }
834 
835  // clang-format off
836  ////////////////////////////////////////////////////////////////////////////
837  /// \brief Creates a node that filters entries based on range.
838  /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
839  /// \return a node of the computation graph for which the range is defined.
840  ///
841  /// See the other Range overload for a detailed description.
842  // clang-format on
843  RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
844 
845  // clang-format off
846  ////////////////////////////////////////////////////////////////////////////
847  /// \brief Execute a user-defined function on each entry (*instant action*).
848  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
849  /// \param[in] columns Names of the columns/branches in input to the user function.
850  ///
851  /// The callable `f` is invoked once per entry. This is an *instant action*:
852  /// upon invocation, an event loop as well as execution of all scheduled actions
853  /// is triggered.
854  /// Users are responsible for the thread-safety of this callable when executing
855  /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
856  ///
857  /// ### Example usage:
858  /// ~~~{.cpp}
859  /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
860  /// ~~~
861  // clang-format on
862  template <typename F>
863  void Foreach(F f, const ColumnNames_t &columns = {})
864  {
865  using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
866  using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
867  ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
868  }
869 
870  // clang-format off
871  ////////////////////////////////////////////////////////////////////////////
872  /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*).
873  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
874  /// \param[in] columns Names of the columns/branches in input to the user function.
875  ///
876  /// Same as `Foreach`, but the user-defined function takes an extra
877  /// `unsigned int` as its first parameter, the *processing slot index*.
878  /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
879  /// for each thread of execution.
880  /// This is meant as a helper in writing thread-safe `Foreach`
881  /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
882  /// The user-defined processing callable is able to follow different
883  /// *streams of processing* indexed by the first parameter.
884  /// `ForeachSlot` works just as well with single-thread execution: in that
885  /// case `slot` will always be `0`.
886  ///
887  /// ### Example usage:
888  /// ~~~{.cpp}
889  /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
890  /// ~~~
891  // clang-format on
892  template <typename F>
893  void ForeachSlot(F f, const ColumnNames_t &columns = {})
894  {
895  using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
896  constexpr auto nColumns = ColTypes_t::list_size;
897 
898  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
899  CheckAndFillDSColumns(validColumnNames, ColTypes_t());
900 
901  using Helper_t = RDFInternal::ForeachSlotHelper<F>;
903 
904  auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fDefines);
905  fLoopManager->Book(action.get());
906 
907  fLoopManager->Run();
908  }
909 
910  // clang-format off
911  ////////////////////////////////////////////////////////////////////////////
912  /// \brief Execute a user-defined reduce operation on the values of a column.
913  /// \tparam F The type of the reduce callable. Automatically deduced.
914  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
915  /// \param[in] f A callable with signature `T(T,T)`
916  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
917  /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr.
918  ///
919  /// A reduction takes two values of a column and merges them into one (e.g.
920  /// by summing them, taking the maximum, etc). This action performs the
921  /// specified reduction operation on all processed column values, returning
922  /// a single value of the same type. The callable f must satisfy the general
923  /// requirements of a *processing function* besides having signature `T(T,T)`
924  /// where `T` is the type of column columnName.
925  ///
926  /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
927  /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
928  /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
929  /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
930  /// overload.
931  ///
932  /// ### Example usage:
933  /// ~~~{.cpp}
934  /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
935  /// ~~~
936  ///
937  /// This action is *lazy*: upon invocation of this method the calculation is
938  /// booked but not executed. Also see RResultPtr.
939  // clang-format on
940  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
942  {
943  static_assert(
944  std::is_default_constructible<T>::value,
945  "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
946  return Reduce(std::move(f), columnName, T());
947  }
948 
949  ////////////////////////////////////////////////////////////////////////////
950  /// \brief Execute a user-defined reduce operation on the values of a column.
951  /// \tparam F The type of the reduce callable. Automatically deduced.
952  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
953  /// \param[in] f A callable with signature `T(T,T)`
954  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
955  /// \param[in] redIdentity The reduced object of each thread is initialised to this value.
956  /// \return the reduced quantity wrapped in a RResultPtr.
957  ///
958  /// ### Example usage:
959  /// ~~~{.cpp}
960  /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
961  /// ~~~
962  /// See the description of the first Reduce overload for more information.
963  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
964  RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
965  {
966  return Aggregate(f, f, columnName, redIdentity);
967  }
968 
969  ////////////////////////////////////////////////////////////////////////////
970  /// \brief Return the number of entries processed (*lazy action*).
971  /// \return the number of entries wrapped in a RResultPtr.
972  ///
973  /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
974  /// This action is *lazy*: upon invocation of this method the calculation is
975  /// booked but not executed. Also see RResultPtr.
976  ///
977  /// ### Example usage:
978  /// ~~~{.cpp}
979  /// auto nEntriesAfterCuts = myFilteredDf.Count();
980  /// ~~~
981  ///
983  {
984  const auto nSlots = fLoopManager->GetNSlots();
985  auto cSPtr = std::make_shared<ULong64_t>(0);
986  using Helper_t = RDFInternal::CountHelper;
988  auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
990  fLoopManager->Book(action.get());
991  return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
992  }
993 
994  ////////////////////////////////////////////////////////////////////////////
995  /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default).
996  /// \tparam T The type of the column.
997  /// \tparam COLL The type of collection used to store the values.
998  /// \param[in] column The name of the column to collect the values of.
999  /// \return the content of the selected column wrapped in a RResultPtr.
1000  ///
1001  /// The collection type to be specified for C-style array columns is `RVec<T>`:
1002  /// in this case the returned collection is a `std::vector<RVec<T>>`.
1003  /// ### Example usage:
1004  /// ~~~{.cpp}
1005  /// // In this case intCol is a std::vector<int>
1006  /// auto intCol = rdf.Take<int>("integerColumn");
1007  /// // Same content as above but in this case taken as a RVec<int>
1008  /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
1009  /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
1010  /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
1011  /// ~~~
1012  /// This action is *lazy*: upon invocation of this method the calculation is
1013  /// booked but not executed. Also see RResultPtr.
1014  template <typename T, typename COLL = std::vector<T>>
1016  {
1017  const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
1018 
1019  const auto validColumnNames = GetValidatedColumnNames(1, columns);
1020  CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
1021 
1022  using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1023  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1024  auto valuesPtr = std::make_shared<COLL>();
1025  const auto nSlots = fLoopManager->GetNSlots();
1026 
1027  auto action = std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fDefines);
1028  fLoopManager->Book(action.get());
1029  return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
1030  }
1031 
1032  ////////////////////////////////////////////////////////////////////////////
1033  /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1034  /// \tparam V The type of the column used to fill the histogram.
1035  /// \param[in] model The returned histogram will be constructed using this as a model.
1036  /// \param[in] vName The name of the column that will fill the histogram.
1037  /// \return the monodimensional histogram wrapped in a RResultPtr.
1038  ///
1039  /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
1040  /// is filled with each one of the elements of the container. In case multiple columns of container type
1041  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1042  /// possibly different lengths between events).
1043  /// This action is *lazy*: upon invocation of this method the calculation is
1044  /// booked but not executed. Also see RResultPtr.
1045  ///
1046  /// ### Example usage:
1047  /// ~~~{.cpp}
1048  /// // Deduce column type (this invocation needs jitting internally)
1049  /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1050  /// // Explicit column type
1051  /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1052  /// ~~~
1053  ///
1054  /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1055  /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1056  /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1057  template <typename V = RDFDetail::RInferredType>
1058  RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
1059  {
1060  const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
1061 
1062  const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
1063 
1064  std::shared_ptr<::TH1D> h(nullptr);
1065  {
1066  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1067  h = model.GetHistogram();
1068  h->SetDirectory(nullptr);
1069  }
1070 
1071  if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
1072  RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
1073  return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h);
1074  }
1075 
1076  ////////////////////////////////////////////////////////////////////////////
1077  /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1078  /// \tparam V The type of the column used to fill the histogram.
1079  /// \param[in] vName The name of the column that will fill the histogram.
1080  /// \return the monodimensional histogram wrapped in a RResultPtr.
1081  ///
1082  /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1083  /// The "name" and "title" strings are built starting from the input column name.
1084  /// See the description of the first Histo1D() overload for more details.
1085  ///
1086  /// ### Example usage:
1087  /// ~~~{.cpp}
1088  /// // Deduce column type (this invocation needs jitting internally)
1089  /// auto myHist1 = myDf.Histo1D("myColumn");
1090  /// // Explicit column type
1091  /// auto myHist2 = myDf.Histo1D<float>("myColumn");
1092  /// ~~~
1093  template <typename V = RDFDetail::RInferredType>
1095  {
1096  const auto h_name = std::string(vName);
1097  const auto h_title = h_name + ";" + h_name + ";count";
1098  return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1099  }
1100 
1101  ////////////////////////////////////////////////////////////////////////////
1102  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1103  /// \tparam V The type of the column used to fill the histogram.
1104  /// \tparam W The type of the column used as weights.
1105  /// \param[in] model The returned histogram will be constructed using this as a model.
1106  /// \param[in] vName The name of the column that will fill the histogram.
1107  /// \param[in] wName The name of the column that will provide the weights.
1108  /// \return the monodimensional histogram wrapped in a RResultPtr.
1109  ///
1110  /// See the description of the first Histo1D() overload for more details.
1111  ///
1112  /// ### Example usage:
1113  /// ~~~{.cpp}
1114  /// // Deduce column type (this invocation needs jitting internally)
1115  /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1116  /// // Explicit column type
1117  /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1118  /// ~~~
1119  template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1121  {
1122  const std::vector<std::string_view> columnViews = {vName, wName};
1123  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1124  ? ColumnNames_t()
1125  : ColumnNames_t(columnViews.begin(), columnViews.end());
1126  std::shared_ptr<::TH1D> h(nullptr);
1127  {
1128  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1129  h = model.GetHistogram();
1130  }
1131  return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h);
1132  }
1133 
1134  ////////////////////////////////////////////////////////////////////////////
1135  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1136  /// \tparam V The type of the column used to fill the histogram.
1137  /// \tparam W The type of the column used as weights.
1138  /// \param[in] vName The name of the column that will fill the histogram.
1139  /// \param[in] wName The name of the column that will provide the weights.
1140  /// \return the monodimensional histogram wrapped in a RResultPtr.
1141  ///
1142  /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1143  /// The "name" and "title" strings are built starting from the input column names.
1144  /// See the description of the first Histo1D() overload for more details.
1145  ///
1146  /// ### Example usage:
1147  /// ~~~{.cpp}
1148  /// // Deduce column types (this invocation needs jitting internally)
1149  /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1150  /// // Explicit column types
1151  /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1152  /// ~~~
1153  template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1155  {
1156  // We build name and title based on the value and weight column names
1157  std::string str_vName{vName};
1158  std::string str_wName{wName};
1159  const auto h_name = str_vName + "_weighted_" + str_wName;
1160  const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1161  return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1162  }
1163 
1164  ////////////////////////////////////////////////////////////////////////////
1165  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1166  /// \tparam V The type of the column used to fill the histogram.
1167  /// \tparam W The type of the column used as weights.
1168  /// \param[in] model The returned histogram will be constructed using this as a model.
1169  /// \return the monodimensional histogram wrapped in a RResultPtr.
1170  ///
1171  /// This overload will use the first two default columns as column names.
1172  /// See the description of the first Histo1D() overload for more details.
1173  template <typename V, typename W>
1174  RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1175  {
1176  return Histo1D<V, W>(model, "", "");
1177  }
1178 
1179  ////////////////////////////////////////////////////////////////////////////
1180  /// \brief Fill and return a two-dimensional histogram (*lazy action*).
1181  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1182  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1183  /// \param[in] model The returned histogram will be constructed using this as a model.
1184  /// \param[in] v1Name The name of the column that will fill the x axis.
1185  /// \param[in] v2Name The name of the column that will fill the y axis.
1186  /// \return the bidimensional histogram wrapped in a RResultPtr.
1187  ///
1188  /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1189  /// is filled with each one of the elements of the container. In case multiple columns of container type
1190  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1191  /// possibly different lengths between events).
1192  /// This action is *lazy*: upon invocation of this method the calculation is
1193  /// booked but not executed. Also see RResultPtr.
1194  ///
1195  /// ### Example usage:
1196  /// ~~~{.cpp}
1197  /// // Deduce column types (this invocation needs jitting internally)
1198  /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1199  /// // Explicit column types
1200  /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1201  /// ~~~
1202  ///
1203  ///
1204  /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1205  /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1206  /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1207  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1209  {
1210  std::shared_ptr<::TH2D> h(nullptr);
1211  {
1212  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1213  h = model.GetHistogram();
1214  }
1215  if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1216  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1217  }
1218  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1219  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1220  ? ColumnNames_t()
1221  : ColumnNames_t(columnViews.begin(), columnViews.end());
1222  return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h);
1223  }
1224 
1225  ////////////////////////////////////////////////////////////////////////////
1226  /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*).
1227  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1228  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1229  /// \tparam W The type of the column used for the weights of the histogram.
1230  /// \param[in] model The returned histogram will be constructed using this as a model.
1231  /// \param[in] v1Name The name of the column that will fill the x axis.
1232  /// \param[in] v2Name The name of the column that will fill the y axis.
1233  /// \param[in] wName The name of the column that will provide the weights.
1234  /// \return the bidimensional histogram wrapped in a RResultPtr.
1235  ///
1236  /// This action is *lazy*: upon invocation of this method the calculation is
1237  /// booked but not executed. Also see RResultPtr.
1238  ///
1239  /// ### Example usage:
1240  /// ~~~{.cpp}
1241  /// // Deduce column types (this invocation needs jitting internally)
1242  /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1243  /// // Explicit column types
1244  /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1245  /// ~~~
1246  ///
1247  /// See the documentation of the first Histo2D() overload for more details.
1248  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1249  typename W = RDFDetail::RInferredType>
1252  {
1253  std::shared_ptr<::TH2D> h(nullptr);
1254  {
1255  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1256  h = model.GetHistogram();
1257  }
1258  if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1259  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1260  }
1261  const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1262  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1263  ? ColumnNames_t()
1264  : ColumnNames_t(columnViews.begin(), columnViews.end());
1265  return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h);
1266  }
1267 
1268  template <typename V1, typename V2, typename W>
1270  {
1271  return Histo2D<V1, V2, W>(model, "", "", "");
1272  }
1273 
1274  ////////////////////////////////////////////////////////////////////////////
1275  /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1276  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1277  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1278  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1279  /// \param[in] model The returned histogram will be constructed using this as a model.
1280  /// \param[in] v1Name The name of the column that will fill the x axis.
1281  /// \param[in] v2Name The name of the column that will fill the y axis.
1282  /// \param[in] v3Name The name of the column that will fill the z axis.
1283  /// \return the tridimensional histogram wrapped in a RResultPtr.
1284  ///
1285  /// This action is *lazy*: upon invocation of this method the calculation is
1286  /// booked but not executed. Also see RResultPtr.
1287  ///
1288  /// ### Example usage:
1289  /// ~~~{.cpp}
1290  /// // Deduce column types (this invocation needs jitting internally)
1291  /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1292  /// "myValueX", "myValueY", "myValueZ");
1293  /// // Explicit column types
1294  /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1295  /// "myValueX", "myValueY", "myValueZ");
1296  /// ~~~
1297  ///
1298  /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1299  /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1300  /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1301  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1302  typename V3 = RDFDetail::RInferredType>
1304  std::string_view v3Name = "")
1305  {
1306  std::shared_ptr<::TH3D> h(nullptr);
1307  {
1308  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1309  h = model.GetHistogram();
1310  }
1311  if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1312  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1313  }
1314  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1315  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1316  ? ColumnNames_t()
1317  : ColumnNames_t(columnViews.begin(), columnViews.end());
1318  return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h);
1319  }
1320 
1321  ////////////////////////////////////////////////////////////////////////////
1322  /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1323  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1324  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1325  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1326  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1327  /// \param[in] model The returned histogram will be constructed using this as a model.
1328  /// \param[in] v1Name The name of the column that will fill the x axis.
1329  /// \param[in] v2Name The name of the column that will fill the y axis.
1330  /// \param[in] v3Name The name of the column that will fill the z axis.
1331  /// \param[in] wName The name of the column that will provide the weights.
1332  /// \return the tridimensional histogram wrapped in a RResultPtr.
1333  ///
1334  /// This action is *lazy*: upon invocation of this method the calculation is
1335  /// booked but not executed. Also see RResultPtr.
1336  ///
1337  /// ### Example usage:
1338  /// ~~~{.cpp}
1339  /// // Deduce column types (this invocation needs jitting internally)
1340  /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1341  /// "myValueX", "myValueY", "myValueZ", "myWeight");
1342  /// // Explicit column types
1343  /// using d_t = double;
1344  /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1345  /// "myValueX", "myValueY", "myValueZ", "myWeight");
1346  /// ~~~
1347  ///
1348  ///
1349  /// See the documentation of the first Histo2D() overload for more details.
1350  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1351  typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1353  std::string_view v3Name, std::string_view wName)
1354  {
1355  std::shared_ptr<::TH3D> h(nullptr);
1356  {
1357  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1358  h = model.GetHistogram();
1359  }
1360  if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1361  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1362  }
1363  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1364  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1365  ? ColumnNames_t()
1366  : ColumnNames_t(columnViews.begin(), columnViews.end());
1367  return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h);
1368  }
1369 
1370  template <typename V1, typename V2, typename V3, typename W>
1372  {
1373  return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1374  }
1375 
1376  ////////////////////////////////////////////////////////////////////////////
1377  /// \brief Fill and return a graph (*lazy action*).
1378  /// \tparam V1 The type of the column used to fill the x axis of the graph.
1379  /// \tparam V2 The type of the column used to fill the y axis of the graph.
1380  /// \param[in] v1Name The name of the column that will fill the x axis.
1381  /// \param[in] v2Name The name of the column that will fill the y axis.
1382  /// \return the graph wrapped in a RResultPtr.
1383  ///
1384  /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1385  /// is filled with each one of the elements of the container.
1386  /// If Multithreading is enabled, the order in which points are inserted is undefined.
1387  /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1388  /// A name and a title to the graph is given based on the input column names.
1389  ///
1390  /// This action is *lazy*: upon invocation of this method the calculation is
1391  /// booked but not executed. Also see RResultPtr.
1392  ///
1393  /// ### Example usage:
1394  /// ~~~{.cpp}
1395  /// // Deduce column types (this invocation needs jitting internally)
1396  /// auto myGraph1 = myDf.Graph("xValues", "yValues");
1397  /// // Explicit column types
1398  /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
1399  /// ~~~
1400  ///
1401  /// \note Differently from other ROOT interfaces, the returned graph is not associated to gDirectory
1402  /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1403  /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1404  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1406  {
1407  auto graph = std::make_shared<::TGraph>();
1408  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1409  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1410  ? ColumnNames_t()
1411  : ColumnNames_t(columnViews.begin(), columnViews.end());
1412 
1413  const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
1414 
1415  // We build a default name and title based on the input columns
1416  if (!(validatedColumns[0].empty() && validatedColumns[1].empty())) {
1417  const auto g_name = std::string(v1Name) + "_vs_" + std::string(v2Name);
1418  const auto g_title = std::string(v1Name) + " vs " + std::string(v2Name);
1419  graph->SetNameTitle(g_name.c_str(), g_title.c_str());
1420  graph->GetXaxis()->SetTitle(std::string(v1Name).c_str());
1421  graph->GetYaxis()->SetTitle(std::string(v2Name).c_str());
1422  }
1423 
1424  return CreateAction<RDFInternal::ActionTags::Graph, V1, V2>(validatedColumns, graph, graph);
1425  }
1426 
1427  ////////////////////////////////////////////////////////////////////////////
1428  /// \brief Fill and return a one-dimensional profile (*lazy action*).
1429  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1430  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1431  /// \param[in] model The model to be considered to build the new return value.
1432  /// \param[in] v1Name The name of the column that will fill the x axis.
1433  /// \param[in] v2Name The name of the column that will fill the y axis.
1434  /// \return the monodimensional profile wrapped in a RResultPtr.
1435  ///
1436  /// This action is *lazy*: upon invocation of this method the calculation is
1437  /// booked but not executed. Also see RResultPtr.
1438  ///
1439  /// ### Example usage:
1440  /// ~~~{.cpp}
1441  /// // Deduce column types (this invocation needs jitting internally)
1442  /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1443  /// // Explicit column types
1444  /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1445  /// ~~~
1446  ///
1447  /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
1448  /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1449  /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1450  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1452  Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1453  {
1454  std::shared_ptr<::TProfile> h(nullptr);
1455  {
1456  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1457  h = model.GetProfile();
1458  }
1459 
1460  if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1461  throw std::runtime_error("Profiles with no axes limits are not supported yet.");
1462  }
1463  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1464  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1465  ? ColumnNames_t()
1466  : ColumnNames_t(columnViews.begin(), columnViews.end());
1467  return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h);
1468  }
1469 
1470  ////////////////////////////////////////////////////////////////////////////
1471  /// \brief Fill and return a one-dimensional profile (*lazy action*).
1472  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1473  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1474  /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
1475  /// \param[in] model The model to be considered to build the new return value.
1476  /// \param[in] v1Name The name of the column that will fill the x axis.
1477  /// \param[in] v2Name The name of the column that will fill the y axis.
1478  /// \param[in] wName The name of the column that will provide the weights.
1479  /// \return the monodimensional profile wrapped in a RResultPtr.
1480  ///
1481  /// This action is *lazy*: upon invocation of this method the calculation is
1482  /// booked but not executed. Also see RResultPtr.
1483  ///
1484  /// ### Example usage:
1485  /// ~~~{.cpp}
1486  /// // Deduce column types (this invocation needs jitting internally)
1487  /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
1488  /// // Explicit column types
1489  /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
1490  /// "xValues", "yValues", "weight");
1491  /// ~~~
1492  ///
1493  /// See the first Profile1D() overload for more details.
1494  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1495  typename W = RDFDetail::RInferredType>
1498  {
1499  std::shared_ptr<::TProfile> h(nullptr);
1500  {
1501  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1502  h = model.GetProfile();
1503  }
1504 
1505  if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1506  throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
1507  }
1508  const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1509  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1510  ? ColumnNames_t()
1511  : ColumnNames_t(columnViews.begin(), columnViews.end());
1512  return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h);
1513  }
1514 
1515  ////////////////////////////////////////////////////////////////////////////
1516  /// \brief Fill and return a one-dimensional profile (*lazy action*).
1517  /// See the first Profile1D() overload for more details.
1518  template <typename V1, typename V2, typename W>
1520  {
1521  return Profile1D<V1, V2, W>(model, "", "", "");
1522  }
1523 
1524  ////////////////////////////////////////////////////////////////////////////
1525  /// \brief Fill and return a two-dimensional profile (*lazy action*).
1526  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1527  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1528  /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1529  /// \param[in] model The returned profile will be constructed using this as a model.
1530  /// \param[in] v1Name The name of the column that will fill the x axis.
1531  /// \param[in] v2Name The name of the column that will fill the y axis.
1532  /// \param[in] v3Name The name of the column that will fill the z axis.
1533  /// \return the bidimensional profile wrapped in a RResultPtr.
1534  ///
1535  /// This action is *lazy*: upon invocation of this method the calculation is
1536  /// booked but not executed. Also see RResultPtr.
1537  ///
1538  /// ### Example usage:
1539  /// ~~~{.cpp}
1540  /// // Deduce column types (this invocation needs jitting internally)
1541  /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1542  /// "xValues", "yValues", "zValues");
1543  /// // Explicit column types
1544  /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1545  /// "xValues", "yValues", "zValues");
1546  /// ~~~
1547  ///
1548  /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
1549  /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1550  /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1551  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1552  typename V3 = RDFDetail::RInferredType>
1554  std::string_view v2Name = "", std::string_view v3Name = "")
1555  {
1556  std::shared_ptr<::TProfile2D> h(nullptr);
1557  {
1558  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1559  h = model.GetProfile();
1560  }
1561 
1562  if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1563  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1564  }
1565  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1566  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1567  ? ColumnNames_t()
1568  : ColumnNames_t(columnViews.begin(), columnViews.end());
1569  return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h);
1570  }
1571 
1572  ////////////////////////////////////////////////////////////////////////////
1573  /// \brief Fill and return a two-dimensional profile (*lazy action*).
1574  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1575  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1576  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1577  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1578  /// \param[in] model The returned histogram will be constructed using this as a model.
1579  /// \param[in] v1Name The name of the column that will fill the x axis.
1580  /// \param[in] v2Name The name of the column that will fill the y axis.
1581  /// \param[in] v3Name The name of the column that will fill the z axis.
1582  /// \param[in] wName The name of the column that will provide the weights.
1583  /// \return the bidimensional profile wrapped in a RResultPtr.
1584  ///
1585  /// This action is *lazy*: upon invocation of this method the calculation is
1586  /// booked but not executed. Also see RResultPtr.
1587  ///
1588  /// ### Example usage:
1589  /// ~~~{.cpp}
1590  /// // Deduce column types (this invocation needs jitting internally)
1591  /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1592  /// "xValues", "yValues", "zValues", "weight");
1593  /// // Explicit column types
1594  /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1595  /// "xValues", "yValues", "zValues", "weight");
1596  /// ~~~
1597  ///
1598  /// See the first Profile2D() overload for more details.
1599  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1600  typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1602  std::string_view v3Name, std::string_view wName)
1603  {
1604  std::shared_ptr<::TProfile2D> h(nullptr);
1605  {
1606  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1607  h = model.GetProfile();
1608  }
1609 
1610  if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1611  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1612  }
1613  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1614  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1615  ? ColumnNames_t()
1616  : ColumnNames_t(columnViews.begin(), columnViews.end());
1617  return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h);
1618  }
1619 
1620  /// \brief Fill and return a two-dimensional profile (*lazy action*).
1621  /// See the first Profile2D() overload for more details.
1622  template <typename V1, typename V2, typename V3, typename W>
1624  {
1625  return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
1626  }
1627 
1628  ////////////////////////////////////////////////////////////////////////////
1629  /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*).
1630  ///
1631  /// Type T must provide at least:
1632  /// - a copy-constructor
1633  /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList
1634  /// (these types can also be passed as template parameters to this method)
1635  /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the
1636  /// objects assed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that
1637  /// if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in
1638  /// the TCollection*).
1639  ///
1640  /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
1641  /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
1642  /// \tparam T The type of the object to fill. Automatically deduced.
1643  /// \param[in] model The model to be considered to build the new return value.
1644  /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1645  /// \return the filled object wrapped in a RResultPtr.
1646  ///
1647  /// The user gives up ownership of the model object.
1648  /// The list of column names to be used for filling must always be specified.
1649  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
1650  /// Also see RResultPtr.
1651  ///
1652  /// ### Example usage:
1653  /// ~~~{.cpp}
1654  /// MyClass obj;
1655  /// // Deduce column types (this invocation needs jitting internally, and in this case
1656  /// // MyClass needs to be known to the interpreter)
1657  /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
1658  /// // explicit column types
1659  /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});
1660  /// ~~~
1661  ///
1662  template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T>
1663  RResultPtr<T> Fill(T &&model, const ColumnNames_t &columnList)
1664  {
1665  auto h = std::make_shared<T>(std::forward<T>(model));
1666  if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1667  throw std::runtime_error("The absence of axes limits is not supported yet.");
1668  }
1669  return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h,
1670  columnList.size());
1671  }
1672 
1673  ////////////////////////////////////////////////////////////////////////////
1674  /// \brief Return a TStatistic object, filled once per event (*lazy action*).
1675  ///
1676  /// \tparam V The type of the value column
1677  /// \param[in] value The name of the column with the values to fill the statistics with.
1678  /// \return the filled TStatistic object wrapped in a RResultPtr.
1679  ///
1680  /// ### Example usage:
1681  /// ~~~{.cpp}
1682  /// // Deduce column type (this invocation needs jitting internally)
1683  /// auto stats0 = myDf.Stats("values");
1684  /// // Explicit column type
1685  /// auto stats1 = myDf.Stats<float>("values");
1686  /// ~~~
1687  ///
1688  template <typename V = RDFDetail::RInferredType>
1690  {
1691  ColumnNames_t columns;
1692  if (!value.empty()) {
1693  columns.emplace_back(std::string(value));
1694  }
1695  const auto validColumnNames = GetValidatedColumnNames(1, columns);
1696  if (std::is_same<V, RDFDetail::RInferredType>::value) {
1697  return Fill(TStatistic(), validColumnNames);
1698  } else {
1699  return Fill<V>(TStatistic(), validColumnNames);
1700  }
1701  }
1702 
1703  ////////////////////////////////////////////////////////////////////////////
1704  /// \brief Return a TStatistic object, filled once per event (*lazy action*).
1705  ///
1706  /// \tparam V The type of the value column
1707  /// \tparam W The type of the weight column
1708  /// \param[in] value The name of the column with the values to fill the statistics with.
1709  /// \param[in] weight The name of the column with the weights to fill the statistics with.
1710  /// \return the filled TStatistic object wrapped in a RResultPtr.
1711  ///
1712  /// ### Example usage:
1713  /// ~~~{.cpp}
1714  /// // Deduce column types (this invocation needs jitting internally)
1715  /// auto stats0 = myDf.Stats("values", "weights");
1716  /// // Explicit column types
1717  /// auto stats1 = myDf.Stats<int, float>("values", "weights");
1718  /// ~~~
1719  ///
1720  template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1722  {
1723  ColumnNames_t columns{std::string(value), std::string(weight)};
1724  constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
1725  constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
1726  const auto validColumnNames = GetValidatedColumnNames(2, columns);
1727  // We have 3 cases:
1728  // 1. Both types are inferred: we use Fill and let the jit kick in.
1729  // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
1730  // 3. Both types are explicit: we invoke the fully compiled Fill method.
1731  if (vIsInferred && wIsInferred) {
1732  return Fill(TStatistic(), validColumnNames);
1733  } else if (vIsInferred != wIsInferred) {
1734  std::string error("The ");
1735  error += vIsInferred ? "value " : "weight ";
1736  error += "column type is explicit, while the ";
1737  error += vIsInferred ? "weight " : "value ";
1738  error += " is specified to be inferred. This case is not supported: please specify both types or none.";
1739  throw std::runtime_error(error);
1740  } else {
1741  return Fill<V, W>(TStatistic(), validColumnNames);
1742  }
1743  }
1744 
1745  ////////////////////////////////////////////////////////////////////////////
1746  /// \brief Return the minimum of processed column values (*lazy action*).
1747  /// \tparam T The type of the branch/column.
1748  /// \param[in] columnName The name of the branch/column to be treated.
1749  /// \return the minimum value of the selected column wrapped in a RResultPtr.
1750  ///
1751  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1752  /// template specialization of this method.
1753  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1754  ///
1755  /// This action is *lazy*: upon invocation of this method the calculation is
1756  /// booked but not executed. Also see RResultPtr.
1757  ///
1758  /// ### Example usage:
1759  /// ~~~{.cpp}
1760  /// // Deduce column type (this invocation needs jitting internally)
1761  /// auto minVal0 = myDf.Min("values");
1762  /// // Explicit column type
1763  /// auto minVal1 = myDf.Min<double>("values");
1764  /// ~~~
1765  ///
1766  template <typename T = RDFDetail::RInferredType>
1768  {
1769  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1770  using RetType_t = RDFDetail::MinReturnType_t<T>;
1771  auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
1772  return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV);
1773  }
1774 
1775  ////////////////////////////////////////////////////////////////////////////
1776  /// \brief Return the maximum of processed column values (*lazy action*).
1777  /// \tparam T The type of the branch/column.
1778  /// \param[in] columnName The name of the branch/column to be treated.
1779  /// \return the maximum value of the selected column wrapped in a RResultPtr.
1780  ///
1781  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1782  /// template specialization of this method.
1783  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1784  ///
1785  /// This action is *lazy*: upon invocation of this method the calculation is
1786  /// booked but not executed. Also see RResultPtr.
1787  ///
1788  /// ### Example usage:
1789  /// ~~~{.cpp}
1790  /// // Deduce column type (this invocation needs jitting internally)
1791  /// auto maxVal0 = myDf.Max("values");
1792  /// // Explicit column type
1793  /// auto maxVal1 = myDf.Max<double>("values");
1794  /// ~~~
1795  ///
1796  template <typename T = RDFDetail::RInferredType>
1798  {
1799  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1800  using RetType_t = RDFDetail::MaxReturnType_t<T>;
1801  auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
1802  return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV);
1803  }
1804 
1805  ////////////////////////////////////////////////////////////////////////////
1806  /// \brief Return the mean of processed column values (*lazy action*).
1807  /// \tparam T The type of the branch/column.
1808  /// \param[in] columnName The name of the branch/column to be treated.
1809  /// \return the mean value of the selected column wrapped in a RResultPtr.
1810  ///
1811  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1812  /// template specialization of this method.
1813  ///
1814  /// This action is *lazy*: upon invocation of this method the calculation is
1815  /// booked but not executed. Also see RResultPtr.
1816  ///
1817  /// ### Example usage:
1818  /// ~~~{.cpp}
1819  /// // Deduce column type (this invocation needs jitting internally)
1820  /// auto meanVal0 = myDf.Mean("values");
1821  /// // Explicit column type
1822  /// auto meanVal1 = myDf.Mean<double>("values");
1823  /// ~~~
1824  ///
1825  template <typename T = RDFDetail::RInferredType>
1827  {
1828  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1829  auto meanV = std::make_shared<double>(0);
1830  return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV);
1831  }
1832 
1833  ////////////////////////////////////////////////////////////////////////////
1834  /// \brief Return the unbiased standard deviation of processed column values (*lazy action*).
1835  /// \tparam T The type of the branch/column.
1836  /// \param[in] columnName The name of the branch/column to be treated.
1837  /// \return the standard deviation value of the selected column wrapped in a RResultPtr.
1838  ///
1839  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1840  /// template specialization of this method.
1841  ///
1842  /// This action is *lazy*: upon invocation of this method the calculation is
1843  /// booked but not executed. Also see RResultPtr.
1844  ///
1845  /// ### Example usage:
1846  /// ~~~{.cpp}
1847  /// // Deduce column type (this invocation needs jitting internally)
1848  /// auto stdDev0 = myDf.StdDev("values");
1849  /// // Explicit column type
1850  /// auto stdDev1 = myDf.StdDev<double>("values");
1851  /// ~~~
1852  ///
1853  template <typename T = RDFDetail::RInferredType>
1855  {
1856  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1857  auto stdDeviationV = std::make_shared<double>(0);
1858  return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV);
1859  }
1860 
1861  // clang-format off
1862  ////////////////////////////////////////////////////////////////////////////
1863  /// \brief Return the sum of processed column values (*lazy action*).
1864  /// \tparam T The type of the branch/column.
1865  /// \param[in] columnName The name of the branch/column.
1866  /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
1867  /// \return the sum of the selected column wrapped in a RResultPtr.
1868  ///
1869  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1870  /// template specialization of this method.
1871  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1872  ///
1873  /// This action is *lazy*: upon invocation of this method the calculation is
1874  /// booked but not executed. Also see RResultPtr.
1875  ///
1876  /// ### Example usage:
1877  /// ~~~{.cpp}
1878  /// // Deduce column type (this invocation needs jitting internally)
1879  /// auto sum0 = myDf.Sum("values");
1880  /// // Explicit column type
1881  /// auto sum1 = myDf.Sum<double>("values");
1882  /// ~~~
1883  ///
1884  template <typename T = RDFDetail::RInferredType>
1886  Sum(std::string_view columnName = "",
1887  const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
1888  {
1889  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1890  auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
1891  return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV);
1892  }
1893  // clang-format on
1894 
1895  ////////////////////////////////////////////////////////////////////////////
1896  /// \brief Gather filtering statistics.
1897  /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr.
1898  ///
1899  /// Calling `Report` on the main `RDataFrame` object gathers stats for
1900  /// all named filters in the call graph. Calling this method on a
1901  /// stored chain state (i.e. a graph node different from the first) gathers
1902  /// the stats for all named filters in the chain section between the original
1903  /// `RDataFrame` and that node (included). Stats are gathered in the same
1904  /// order as the named filters have been added to the graph.
1905  /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
1906  /// effects cuts had.
1907  ///
1908  /// This action is *lazy*: upon invocation of
1909  /// this method the calculation is booked but not executed. See RResultPtr
1910  /// documentation.
1911  ///
1912  /// ### Example usage:
1913  /// ~~~{.cpp}
1914  /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
1915  /// auto cutReport = filtered3.Report();
1916  /// cutReport->Print();
1917  /// ~~~
1918  ///
1920  {
1921  bool returnEmptyReport = false;
1922  // if this is a RInterface<RLoopManager> on which `Define` has been called, users
1923  // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
1924  // certainly does not contain named filters.
1925  // The number 4 takes into account the implicit columns for entry and slot number
1926  // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
1927  if (std::is_same<Proxied, RLoopManager>::value && fDefines.GetNames().size() > 4)
1928  returnEmptyReport = true;
1929 
1930  auto rep = std::make_shared<RCutFlowReport>();
1931  using Helper_t = RDFInternal::ReportHelper<Proxied>;
1932  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1933 
1934  auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}),
1936 
1937  fLoopManager->Book(action.get());
1938  return MakeResultPtr(rep, *fLoopManager, std::move(action));
1939  }
1940 
1941  /////////////////////////////////////////////////////////////////////////////
1942  /// \brief Returns the names of the available columns.
1943  /// \return the container of column names.
1944  ///
1945  /// This is not an action nor a transformation, just a query to the RDataFrame object.
1946  ///
1947  /// ### Example usage:
1948  /// ~~~{.cpp}
1949  /// auto colNames = d.GetColumnNames();
1950  /// // Print columns' names
1951  /// for (auto &&colName : colNames) std::cout << colName << std::endl;
1952  /// ~~~
1953  ///
1955  {
1956  ColumnNames_t allColumns;
1957 
1958  auto addIfNotInternal = [&allColumns](std::string_view colName) {
1959  if (!RDFInternal::IsInternalColumn(colName))
1960  allColumns.emplace_back(colName);
1961  };
1962 
1963  auto columnNames = fDefines.GetNames();
1964 
1965  std::for_each(columnNames.begin(), columnNames.end(), addIfNotInternal);
1966 
1967  auto tree = fLoopManager->GetTree();
1968  if (tree) {
1969  auto branchNames = RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false);
1970  allColumns.insert(allColumns.end(), branchNames.begin(), branchNames.end());
1971  }
1972 
1973  if (fDataSource) {
1974  const auto &dsColNames = fDataSource->GetColumnNames();
1975  // ignore columns starting with __rdf_sizeof_
1976  std::copy_if(dsColNames.begin(), dsColNames.end(), std::back_inserter(allColumns),
1977  [](const std::string &s) { return s.rfind("__rdf_sizeof", 0) != 0; });
1978  }
1979 
1980  return allColumns;
1981  }
1982 
1983  /////////////////////////////////////////////////////////////////////////////
1984  /// \brief Return the type of a given column as a string.
1985  /// \return the type of the required column.
1986  ///
1987  /// This is not an action nor a transformation, just a query to the RDataFrame object.
1988  ///
1989  /// ### Example usage:
1990  /// ~~~{.cpp}
1991  /// auto colType = d.GetColumnType("columnName");
1992  /// // Print column type
1993  /// std::cout << "Column " << colType << " has type " << colType << std::endl;
1994  /// ~~~
1995  ///
1996  std::string GetColumnType(std::string_view column)
1997  {
1998  const auto col = RDFInternal::ResolveAlias(std::string(column), fLoopManager->GetAliasMap());
1999 
2000  RDFDetail::RDefineBase *define = fDefines.HasName(col) ? fDefines.GetColumns().at(col).get() : nullptr;
2001 
2002  const bool convertVector2RVec = true;
2004  convertVector2RVec);
2005  }
2006 
2007  /////////////////////////////////////////////////////////////////////////////
2008  /// \brief Return information about the dataframe.
2009  /// \return information about the dataframe as string
2010  ///
2011  /// This convenience function describes the dataframe and combines the following information:
2012  /// - Information about the dataset, see DescribeDataset()
2013  /// - Number of event loops run, see GetNRuns()
2014  /// - Number of total and defined columns, see GetColumnNames() and GetDefinedColumnNames()
2015  /// - Column names, see GetColumnNames()
2016  /// - Column types, see GetColumnType()
2017  /// - Number of processing slots, see GetNSlots()
2018  ///
2019  /// This is not an action nor a transformation, just a query to the RDataFrame object.
2020  /// The result is dependent on the node from which this method is called, e.g. the list of
2021  /// defined columns returned by GetDefinedColumnNames().
2022  ///
2023  /// Please note that this is a convenience feature and the layout of the output can be subject
2024  /// to change and should not be automatically parsed.
2025  ///
2026  /// ### Example usage:
2027  /// ~~~{.cpp}
2028  /// RDataFrame df(10);
2029  /// auto df2 = df.Define("x", "1.f").Define("s", "\"myStr\"");
2030  /// // Describe the dataframe
2031  /// std::cout << df2.Describe() << std::endl;
2032  /// ~~~
2033  ///
2034  std::string Describe()
2035  {
2036  // Put the information from DescribeDataset on the top
2037  std::stringstream ss;
2038  ss << DescribeDataset() << "\n\n";
2039 
2040  // Build set of defined column names to find later in all column names
2041  // the defined columns more efficiently
2042  const auto columnNames = GetColumnNames();
2043  std::set<std::string> definedColumnNamesSet;
2044  for (const auto &name : GetDefinedColumnNames())
2045  definedColumnNamesSet.insert(name);
2046 
2047  // Get information for the metadata table
2048  const std::vector<std::string> metadataProperties = {"Columns in total", "Columns from defines",
2049  "Event loops run", "Processing slots"};
2050  const std::vector<std::string> metadataValues = {std::to_string(columnNames.size()),
2051  std::to_string(definedColumnNamesSet.size()),
2052  std::to_string(GetNRuns()), std::to_string(GetNSlots())};
2053 
2054  // Set header for metadata table
2055  const auto columnWidthProperties = RDFInternal::GetColumnWidth(metadataProperties);
2056  // The column width of the values is required to make right-bound numbers and is equal
2057  // to the maximum of the string "Value" and all values to be put in this column.
2058  const auto columnWidthValues =
2059  std::max(std::max_element(metadataValues.begin(), metadataValues.end())->size(), static_cast<std::size_t>(5u));
2060  ss << std::left << std::setw(columnWidthProperties) << "Property" << std::setw(columnWidthValues) << "Value\n"
2061  << std::setw(columnWidthProperties) << "--------" << std::setw(columnWidthValues) << "-----\n";
2062 
2063  // Build metadata table
2064  // All numbers should be bound to the right and strings bound to the left.
2065  for (auto i = 0u; i < metadataProperties.size(); i++) {
2066  ss << std::left << std::setw(columnWidthProperties) << metadataProperties[i] << std::right
2067  << std::setw(columnWidthValues) << metadataValues[i] << '\n';
2068  }
2069  ss << '\n'; // put space between this and the next table
2070 
2071  // Set header for columns table
2072  const auto columnWidthNames = RDFInternal::GetColumnWidth(columnNames);
2073  const auto columnTypes = GetColumnTypeNamesList(columnNames);
2074  const auto columnWidthTypes = RDFInternal::GetColumnWidth(columnTypes);
2075  ss << std::left << std::setw(columnWidthNames) << "Column" << std::setw(columnWidthTypes) << "Type"
2076  << "Origin\n"
2077  << std::setw(columnWidthNames) << "------" << std::setw(columnWidthTypes) << "----"
2078  << "------\n";
2079 
2080  // Build columns table
2081  const auto nCols = columnNames.size();
2082  for (auto i = 0u; i < nCols; i++) {
2083  auto origin = "Dataset";
2084  if (definedColumnNamesSet.find(columnNames[i]) != definedColumnNamesSet.end())
2085  origin = "Define";
2086  ss << std::left << std::setw(columnWidthNames) << columnNames[i] << std::setw(columnWidthTypes)
2087  << columnTypes[i] << origin;
2088  if (i < nCols - 1)
2089  ss << '\n';
2090  }
2091 
2092  return ss.str();
2093  }
2094 
2095  /// \brief Returns the names of the filters created.
2096  /// \return the container of filters names.
2097  ///
2098  /// If called on a root node, all the filters in the computation graph will
2099  /// be printed. For any other node, only the filters upstream of that node.
2100  /// Filters without a name are printed as "Unnamed Filter"
2101  /// This is not an action nor a transformation, just a query to the RDataFrame object.
2102  ///
2103  /// ### Example usage:
2104  /// ~~~{.cpp}
2105  /// auto filtNames = d.GetFilterNames();
2106  /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
2107  /// ~~~
2108  ///
2109  std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
2110 
2111  /// \brief Returns the names of the defined columns.
2112  /// \return the container of the defined column names.
2113  ///
2114  /// This is not an action nor a transformation, just a simple utility to
2115  /// get the columns names that have been defined up to the node.
2116  /// If no custom column has been defined, e.g. on a root node, it returns an
2117  /// empty collection.
2118  ///
2119  /// ### Example usage:
2120  /// ~~~{.cpp}
2121  /// auto defColNames = d.GetDefinedColumnNames();
2122  /// // Print defined columns' names
2123  /// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
2124  /// ~~~
2125  ///
2127  {
2128  ColumnNames_t definedColumns;
2129 
2130  auto columns = fDefines.GetColumns();
2131 
2132  for (auto column : columns) {
2133  if (!RDFInternal::IsInternalColumn(column.first))
2134  definedColumns.emplace_back(column.first);
2135  }
2136 
2137  return definedColumns;
2138  }
2139 
2140  /// \brief Checks if a column is present in the dataset.
2141  /// \return true if the column is available, false otherwise
2142  ///
2143  /// This method checks if a column is part of the input ROOT dataset, has
2144  /// been defined or can be provided by the data source.
2145  ///
2146  /// Example usage:
2147  /// ~~~{.cpp}
2148  /// ROOT::RDataFrame base(1);
2149  /// auto rdf = base.Define("definedColumn", [](){return 0;});
2150  /// rdf.HasColumn("definedColumn"); // true: we defined it
2151  /// rdf.HasColumn("rdfentry_"); // true: it's always there
2152  /// rdf.HasColumn("foo"); // false: it is not there
2153  /// ~~~
2154  bool HasColumn(std::string_view columnName)
2155  {
2156  if (fDefines.HasName(columnName))
2157  return true;
2158 
2159  if (auto tree = fLoopManager->GetTree()) {
2160  const auto &branchNames = fLoopManager->GetBranchNames();
2161  const auto branchNamesEnd = branchNames.end();
2162  if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
2163  return true;
2164  }
2165 
2166  if (fDataSource && fDataSource->HasColumn(columnName))
2167  return true;
2168 
2169  return false;
2170  }
2171 
2172  /// \brief Gets the number of data processing slots.
2173  /// \return The number of data processing slots used by this RDataFrame instance
2174  ///
2175  /// This method returns the number of data processing slots used by this RDataFrame
2176  /// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
2177  ///
2178  /// Example usage:
2179  /// ~~~{.cpp}
2180  /// ROOT::EnableImplicitMT(6)
2181  /// ROOT::RDataFrame df(1);
2182  /// std::cout << df.GetNSlots() << std::endl; // prints "6"
2183  /// ~~~
2184  unsigned int GetNSlots() const { return fLoopManager->GetNSlots(); }
2185 
2186  /// \brief Gets the number of event loops run.
2187  /// \return The number of event loops run by this RDataFrame instance
2188  ///
2189  /// This method returns the number of events loops run so far by this RDataFrame instance.
2190  ///
2191  /// Example usage:
2192  /// ~~~{.cpp}
2193  /// ROOT::RDataFrame df(1);
2194  /// std::cout << df.GetNRuns() << std::endl; // prints "0"
2195  /// df.Sum("rdfentry_").GetValue(); // trigger the event loop
2196  /// std::cout << df.GetNRuns() << std::endl; // prints "1"
2197  /// df.Sum("rdfentry_").GetValue(); // trigger another event loop
2198  /// std::cout << df.GetNRuns() << std::endl; // prints "2"
2199  /// ~~~
2200  unsigned int GetNRuns() const { return fLoopManager->GetNRuns(); }
2201 
2202  /// \brief Get descriptive information about the dataset.
2203  /// \return Info describing the dataset as a multi-line string
2204  ///
2205  /// The information returned by this convenience function is meant for interactive
2206  /// use. The exact string format should not be parsed automatically and can be subject to change.
2207  ///
2208  /// Example usage:
2209  /// ~~~{.cpp}
2210  /// ROOT::RDataFrame df("Events", "sample.root");
2211  /// std::cout << df.DescribeDataset() << std::endl;
2212  /// // prints "Dataframe from TTree Events in file sample.root"
2213  /// ~~~
2214  std::string DescribeDataset() const
2215  {
2216  // TTree/TChain as input
2217  const auto tree = fLoopManager->GetTree();
2218  if (tree) {
2219  const auto treeName = tree->GetName();
2220  const auto isTChain = dynamic_cast<TChain *>(tree) ? true : false;
2221  const auto treeType = isTChain ? "TChain" : "TTree";
2222  const auto isInMemory = !isTChain && !tree->GetCurrentFile() ? true : false;
2223  const auto friendInfo = ROOT::Internal::TreeUtils::GetFriendInfo(*tree);
2224  const auto hasFriends = friendInfo.fFriendNames.empty() ? false : true;
2225  std::stringstream ss;
2226  ss << "Dataframe from " << treeType << " " << treeName;
2227  if (isInMemory) {
2228  ss << " (in-memory)";
2229  } else {
2231  const auto numFiles = files.size();
2232  if (numFiles == 1) {
2233  ss << " in file " << files[0];
2234  } else {
2235  ss << " in files\n";
2236  for (auto i = 0u; i < numFiles; i++) {
2237  ss << " " << files[i];
2238  if (i < numFiles - 1)
2239  ss << '\n';
2240  }
2241  }
2242  }
2243  if (hasFriends) {
2244  const auto numFriends = friendInfo.fFriendNames.size();
2245  if (numFriends == 1) {
2246  ss << "\nwith friend\n";
2247  } else {
2248  ss << "\nwith friends\n";
2249  }
2250  for (auto i = 0u; i < numFriends; i++) {
2251  const auto nameAlias = friendInfo.fFriendNames[i];
2252  const auto files = friendInfo.fFriendFileNames[i];
2253  const auto numFiles = files.size();
2254  const auto subnames = friendInfo.fFriendChainSubNames[i];
2255  ss << " " << nameAlias.first;
2256  if (nameAlias.first != nameAlias.second)
2257  ss << " (" << nameAlias.second << ")";
2258  // case: TTree as friend
2259  if (numFiles == 1) {
2260  ss << " " << files[0];
2261  }
2262  // case: TChain as friend
2263  else {
2264  ss << '\n';
2265  for (auto j = 0u; j < numFiles; j++) {
2266  ss << " " << subnames[j] << " " << files[j];
2267  if (j < numFiles - 1)
2268  ss << '\n';
2269  }
2270  }
2271  if (i < numFriends - 1)
2272  ss << '\n';
2273  }
2274  }
2275  return ss.str();
2276  }
2277  // Datasource as input
2278  else if (fDataSource) {
2279  const auto datasourceLabel = fDataSource->GetLabel();
2280  return "Dataframe from datasource " + datasourceLabel;
2281  }
2282  // Trivial/empty datasource
2283  else {
2284  const auto n = fLoopManager->GetNEmptyEntries();
2285  if (n == 1) {
2286  return "Empty dataframe filling 1 row";
2287  } else {
2288  return "Empty dataframe filling " + std::to_string(n) + " rows";
2289  }
2290  }
2291  }
2292 
2293  // clang-format off
2294  ////////////////////////////////////////////////////////////////////////////
2295  /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2296  /// \tparam F The type of the aggregator callable. Automatically deduced.
2297  /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2298  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2299  /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
2300  /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2301  /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2302  /// \param[in] aggIdentity The aggregator variable of each thread is initialised to this value (or is default-constructed if the parameter is omitted)
2303  /// \return the result of the aggregation wrapped in a RResultPtr.
2304  ///
2305  /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2306  /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2307  /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2308  /// the value of the column columnName.
2309  /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2310  /// Otherwise the signature of aggregator must be `void(U&,T)`.
2311  ///
2312  /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2313  /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2314  /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2315  ///
2316  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2317  ///
2318  /// Example usage:
2319  /// ~~~{.cpp}
2320  /// auto aggregator = [](double acc, double x) { return acc * x; };
2321  /// ROOT::EnableImplicitMT();
2322  /// // If multithread is enabled, the aggregator function will be called by more threads
2323  /// // and will produce a vector of partial accumulators.
2324  /// // The merger function performs the final aggregation of these partial results.
2325  /// auto merger = [](std::vector<double> &accumulators) {
2326  /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2327  /// accumulators[0] *= accumulators[i];
2328  /// }
2329  /// };
2330  ///
2331  /// // The accumulator is initialized at this value by every thread.
2332  /// double initValue = 1.;
2333  ///
2334  /// // Multiplies all elements of the column "x"
2335  /// auto result = d.Aggregate(aggregator, merger, columnName, initValue);
2336  /// ~~~
2337  // clang-format on
2338  template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2339  typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2340  typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2341  typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2342  typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2343  RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2344  {
2345  RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2346  const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2347 
2348  const auto validColumnNames = GetValidatedColumnNames(1, columns);
2349  CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
2350 
2351  auto accObjPtr = std::make_shared<U>(aggIdentity);
2352  using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2353  using Action_t = typename RDFInternal::RAction<Helper_t, Proxied>;
2354  auto action = std::make_unique<Action_t>(
2355  Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2357  fLoopManager->Book(action.get());
2358  return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2359  }
2360 
2361  // clang-format off
2362  ////////////////////////////////////////////////////////////////////////////
2363  /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2364  /// \tparam F The type of the aggregator callable. Automatically deduced.
2365  /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2366  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2367  /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2368  /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2369  /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2370  /// \return the result of the aggregation wrapped in a RResultPtr.
2371  ///
2372  /// See previous Aggregate overload for more information.
2373  // clang-format on
2374  template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2375  typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2376  typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2377  typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2378  RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2379  {
2380  static_assert(
2381  std::is_default_constructible<U>::value,
2382  "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2383  return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2384  }
2385 
2386  // clang-format off
2387  ////////////////////////////////////////////////////////////////////////////
2388  /// \brief Book execution of a custom action using a user-defined helper object.
2389  /// \tparam FirstColumn The type of the first column used by this action. Inferred together with OtherColumns if not present.
2390  /// \tparam OtherColumns A list of the types of the other columns used by this action
2391  /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2392  /// \param[in] helper The Action Helper to be scheduled.
2393  /// \param[in] columns The names of the columns on which the helper acts.
2394  /// \return the result of the helper wrapped in a RResultPtr.
2395  ///
2396  /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2397  /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
2398  /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2399  ///
2400  /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
2401  /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
2402  /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
2403  /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
2404  /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2405  /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2406  /// the requested columns for the particular entry being processed.
2407  /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
2408  /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
2409  /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
2410  /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations. Can be no-op.
2411  /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2412  /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
2413  /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
2414  /// method concurrently, but will always pass different 'slot' numbers.
2415  /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
2416  /// Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called
2417  /// before Initialize(), because the RResultPtr is constructed before the event loop is started.
2418  ///
2419  /// In case this is called without specifying column types, jitting is used,
2420  /// and the Helper class needs to be known to the interpreter.
2421  ///
2422  /// See ActionHelpers.hxx for the helpers used by standard RDF actions.
2423  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2424  // clang-format on
2425 
2426  template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper>
2428  {
2429  // TODO add more static sanity checks on Helper
2430  using AH = RDFDetail::RActionImpl<Helper>;
2431  static_assert(std::is_base_of<AH, Helper>::value && std::is_convertible<Helper *, AH *>::value,
2432  "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2433 
2434  auto hPtr = std::make_shared<Helper>(std::forward<Helper>(helper));
2435  auto resPtr = hPtr->GetResultPtr();
2436 
2437  if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) {
2438  return CallCreateActionWithoutColsIfPossible<Helper>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{});
2439  } else {
2440  return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
2441  columns.size());
2442  }
2443  }
2444 
2445  ////////////////////////////////////////////////////////////////////////////
2446  /// \brief Provides a representation of the columns in the dataset.
2447  /// \tparam ColumnTypes variadic list of branch/column types.
2448  /// \param[in] columnList Names of the columns to be displayed.
2449  /// \param[in] nRows Number of events for each column to be displayed.
2450  /// \return the `RDisplay` instance wrapped in a RResultPtr.
2451  ///
2452  /// This function returns a RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
2453  /// form. RDisplay will either print on the standard output a summarized version through `Print()` or will return a
2454  /// complete version through `AsString()`.
2455  ///
2456  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2457  ///
2458  /// Example usage:
2459  /// ~~~{.cpp}
2460  /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
2461  /// auto d1 = rdf.Display("");
2462  /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
2463  /// auto d2 = d.Display({"x", "y"}, 128);
2464  /// // Printing the short representations, the event loop will run
2465  /// d1->Print();
2466  /// d2->Print();
2467  /// ~~~
2468  template <typename... ColumnTypes>
2469  RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2470  {
2471  CheckIMTDisabled("Display");
2472 
2473  auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2474  return CreateAction<RDFInternal::ActionTags::Display, ColumnTypes...>(columnList, displayer, displayer);
2475  }
2476 
2477  ////////////////////////////////////////////////////////////////////////////
2478  /// \brief Provides a representation of the columns in the dataset.
2479  /// \param[in] columnList Names of the columns to be displayed.
2480  /// \param[in] nRows Number of events for each column to be displayed.
2481  /// \return the `RDisplay` instance wrapped in a RResultPtr.
2482  ///
2483  /// This overload automatically infers the column types.
2484  /// See the previous overloads for further details.
2485  RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2486  {
2487  CheckIMTDisabled("Display");
2488  auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2489  return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(columnList, displayer, displayer,
2490  columnList.size());
2491  }
2492 
2493  ////////////////////////////////////////////////////////////////////////////
2494  /// \brief Provides a representation of the columns in the dataset.
2495  /// \param[in] columnNameRegexp A regular expression to select the columns.
2496  /// \param[in] nRows Number of events for each column to be displayed.
2497  /// \return the `RDisplay` instance wrapped in a RResultPtr.
2498  ///
2499  /// The existing columns are matched against the regular expression. If the string provided
2500  /// is empty, all columns are selected.
2501  /// See the previous overloads for further details.
2502  RResultPtr<RDisplay> Display(std::string_view columnNameRegexp = "", const int &nRows = 5)
2503  {
2504  const auto columnNames = GetColumnNames();
2505  const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display");
2506  return Display(selectedColumns, nRows);
2507  }
2508 
2509  ////////////////////////////////////////////////////////////////////////////
2510  /// \brief Provides a representation of the columns in the dataset.
2511  /// \param[in] columnList Names of the columns to be displayed.
2512  /// \param[in] nRows Number of events for each column to be displayed.
2513  /// \return the `RDisplay` instance wrapped in a RResultPtr.
2514  ///
2515  /// See the previous overloads for further details.
2516  RResultPtr<RDisplay> Display(std::initializer_list<std::string> columnList, const int &nRows = 5)
2517  {
2518  ColumnNames_t selectedColumns(columnList);
2519  return Display(selectedColumns, nRows);
2520  }
2521 
2522 private:
2524  {
2526 
2527  // Entry number column
2528  const std::string entryColName = "rdfentry_";
2529  const std::string entryColType = "ULong64_t";
2530  auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
2531  using NewColEntry_t = RDFDetail::RDefine<decltype(entryColGen), RDFDetail::CustomColExtraArgs::SlotAndEntry>;
2532 
2533  auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
2534  ColumnNames_t{}, fLoopManager->GetNSlots(), newCols,
2536  newCols.AddColumn(entryColumn, entryColName);
2537 
2538  // Slot number column
2539  const std::string slotColName = "rdfslot_";
2540  const std::string slotColType = "unsigned int";
2541  auto slotColGen = [](unsigned int slot) { return slot; };
2542  using NewColSlot_t = RDFDetail::RDefine<decltype(slotColGen), RDFDetail::CustomColExtraArgs::Slot>;
2543 
2544  auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen), ColumnNames_t{},
2545  fLoopManager->GetNSlots(), newCols,
2547  newCols.AddColumn(slotColumn, slotColName);
2548 
2549  fDefines = std::move(newCols);
2550 
2551  fLoopManager->AddColumnAlias("tdfentry_", entryColName);
2552  fDefines.AddName("tdfentry_");
2553  fLoopManager->AddColumnAlias("tdfslot_", slotColName);
2554  fDefines.AddName("tdfslot_");
2555  }
2556 
2557  std::vector<std::string> GetColumnTypeNamesList(const ColumnNames_t &columnList)
2558  {
2559  std::vector<std::string> types;
2560 
2561  for (auto column : columnList) {
2562  types.push_back(GetColumnType(column));
2563  }
2564  return types;
2565  }
2566 
2568  {
2569  if (ROOT::IsImplicitMTEnabled()) {
2570  std::string error(callerName);
2571  error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
2572  throw std::runtime_error(error);
2573  }
2574  }
2575 
2576  /// Create RAction object, return RResultPtr for the action
2577  /// Overload for the case in which all column types were specified (no jitting).
2578  /// For most actions, `r` and `helperArg` will refer to the same object, because the only argument to forward to
2579  /// the action helper is the result value itself. We need the distinction for actions such as Snapshot or Cache,
2580  /// for which the constructor arguments of the action helper are different from the returned value.
2581  template <typename ActionTag, typename... ColTypes, typename ActionResultType,
2582  typename HelperArgType = ActionResultType,
2583  typename std::enable_if<!RDFInternal::RNeedJitting<ColTypes...>::value, int>::type = 0>
2584  RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
2585  const std::shared_ptr<HelperArgType> &helperArg, const int /*nColumns*/ = -1)
2586  {
2587  constexpr auto nColumns = sizeof...(ColTypes);
2588 
2589  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2591 
2592  const auto nSlots = fLoopManager->GetNSlots();
2593 
2594  auto action =
2595  RDFInternal::BuildAction<ColTypes...>(validColumnNames, helperArg, nSlots, fProxiedPtr, ActionTag{}, fDefines);
2596  fLoopManager->Book(action.get());
2597  fLoopManager->AddDataBlockCallback(action->GetDataBlockCallback());
2598  return MakeResultPtr(r, *fLoopManager, std::move(action));
2599  }
2600 
2601  /// Create RAction object, return RResultPtr for the action
2602  /// Overload for the case in which one or more column types were not specified (RTTI + jitting).
2603  /// This overload has a `nColumns` optional argument. If present, the number of required columns for
2604  /// this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(ColTypes).
2605  template <typename ActionTag, typename... ColTypes, typename ActionResultType,
2606  typename HelperArgType = ActionResultType,
2607  typename std::enable_if<RDFInternal::RNeedJitting<ColTypes...>::value, int>::type = 0>
2608  RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
2609  const std::shared_ptr<HelperArgType> &helperArg, const int nColumns = -1)
2610  {
2611  auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(ColTypes));
2612 
2613  const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
2614  const unsigned int nSlots = fLoopManager->GetNSlots();
2615 
2616  auto *tree = fLoopManager->GetTree();
2617  auto *helperArgOnHeap = RDFInternal::MakeSharedOnHeap(helperArg);
2618 
2619  auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
2620  using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
2621  RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fDefines, fDataSource);
2622 
2623  const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(*fLoopManager);
2624  auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
2625 
2626  auto toJit = RDFInternal::JitBuildAction(
2627  validColumnNames, upcastNodeOnHeap, typeid(std::shared_ptr<HelperArgType>), typeid(ActionTag), helperArgOnHeap,
2628  tree, nSlots, fDefines, fDataSource, jittedActionOnHeap);
2629  fLoopManager->Book(jittedAction.get());
2630  fLoopManager->ToJitExec(toJit);
2631  return MakeResultPtr(r, *fLoopManager, std::move(jittedAction));
2632  }
2633 
2634  template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
2635  typename std::enable_if<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>::type
2636  DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
2637  {
2639  if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine
2643  } else {
2647  }
2648 
2649  using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
2650  using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
2651  std::is_same<DefineType, RDFDetail::CustomColExtraArgs::Slot>::value, ArgTypes_t>::type;
2652  using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
2653  std::is_same<DefineType, RDFDetail::CustomColExtraArgs::SlotAndEntry>::value, ColTypesTmp_t>::type;
2654 
2655  constexpr auto nColumns = ColTypes_t::list_size;
2656 
2657  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2658  CheckAndFillDSColumns(validColumnNames, ColTypes_t());
2659 
2660  // Declare return type to the interpreter, for future use by jitted actions
2661  auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
2662  if (retTypeName.empty()) {
2663  // The type is not known to the interpreter.
2664  // We must not error out here, but if/when this column is used in jitted code
2665  const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
2666  retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
2667  }
2668 
2669  using NewCol_t = RDFDetail::RDefine<F, DefineType>;
2670  auto newColumn =
2671  std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
2673 
2675  newCols.AddColumn(newColumn, name);
2676 
2677  RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, newCols, fDataSource);
2678 
2679  return newInterface;
2680  }
2681 
2682  // This overload is chosen when the callable passed to Define or DefineSlot returns void.
2683  // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
2684  // this way compilation of `Define` has no way to continue after throwing the error.
2685  template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
2686  bool IsFStringConv = std::is_convertible<F, std::string>::value,
2687  bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
2688  typename std::enable_if<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>::type
2690  {
2691  static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
2692  "Error in `Define`: type returned by expression is not default-constructible");
2693  return *this; // never reached
2694  }
2695 
2696  template <typename... ColumnTypes>
2698  const ColumnNames_t &columnList, const RSnapshotOptions &options)
2699  {
2700  const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
2701 
2702  RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size());
2703  const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
2706 
2707  const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
2708  const auto &treename = parsedTreePath.fTreeName;
2709  const auto &dirname = parsedTreePath.fDirName;
2710 
2711  auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
2712  std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
2713 
2715  auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, validCols);
2716 
2717  auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs);
2718 
2719  if (!options.fLazy)
2720  *resPtr;
2721  return resPtr;
2722  }
2723 
2724  ////////////////////////////////////////////////////////////////////////////
2725  /// \brief Implementation of cache.
2726  template <typename... ColTypes, std::size_t... S>
2727  RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>)
2728  {
2729  const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
2730 
2731  // Check at compile time that the columns types are copy constructible
2732  constexpr bool areCopyConstructible =
2733  RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value;
2734  static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
2735 
2736  RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size());
2737 
2738  auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
2739  auto ds = std::make_unique<RLazyDS<ColTypes...>>(
2740  std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
2741 
2742  RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns));
2743 
2744  return cachedRDF;
2745  }
2746 
2747  template <typename Helper, typename ActionResultType>
2748  auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr,
2749  const std::shared_ptr<Helper> &hPtr,
2751  -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
2752  {
2753  return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, 0u);
2754  }
2755 
2756  template <typename Helper, typename ActionResultType, typename... Others>
2758  CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &,
2759  const std::shared_ptr<Helper>& /*hPtr*/,
2760  Others...)
2761  {
2762  throw std::logic_error(std::string("An action was booked with no input columns, but the action requires "
2763  "columns! The action helper type was ") +
2764  typeid(Helper).name());
2765  return {};
2766  }
2767 
2768 protected:
2769  RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm, const RDFInternal::RBookedDefines &columns,
2770  RDataSource *ds)
2771  : fProxiedPtr(proxied), fLoopManager(&lm), fDataSource(ds), fDefines(columns)
2772  {
2773  }
2774 
2776 
2777  const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
2778 
2779  ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
2780  {
2782  }
2783 
2784  template <typename... ColumnTypes>
2786  {
2787  if (fDataSource != nullptr)
2788  RDFInternal::AddDSColumns(validCols, *fLoopManager, *fDataSource, typeList, fDefines);
2789  }
2790 };
2791 
2792 } // namespace RDF
2793 
2794 } // namespace ROOT
2795 
2796 #endif // ROOT_RDF_INTERFACE
ROOT::RDF::RInterface::DescribeDataset
std::string DescribeDataset() const
Get descriptive information about the dataset.
Definition: RInterface.hxx:2214
ROOT::Detail::RDF::RFilterBase
Definition: RFilterBase.hxx:36
ROOT::Internal::RDF::CheckForRedefinition
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const ColumnNames_t &customCols, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
Definition: RDFInterfaceUtils.cxx:475
ROOT::Detail::RDF::RLoopManager::Run
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
Definition: RLoopManager.cxx:687
n
const Int_t n
Definition: legend1.C:16
ROOT::Internal::RDF::FilterArraySizeColNames
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
Definition: RDFInterfaceUtils.cxx:347
GraphCreatorHelper
Helper class that provides the operation graph nodes.
ROOT::RDF::RInterface::Stats
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action).
Definition: RInterface.hxx:1689
ROOT::TypeTraits
ROOT type_traits extensions.
Definition: TypeTraits.hxx:21
ROOT::RDF::RInterface::DefineImpl
std::enable_if<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view, F, const ColumnNames_t &)
Definition: RInterface.hxx:2689
ROOT::RDF::RInterface::GetNSlots
unsigned int GetNSlots() const
Gets the number of data processing slots.
Definition: RInterface.hxx:2184
ROOT::RDF::RInterface::Define
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column.
Definition: RInterface.hxx:297
ROOT::RDF::RInterface::StdDev
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action).
Definition: RInterface.hxx:1854
ROOT::RDF::RDataSource::GetColumnNames
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
ROOT::Internal::RDF::ConvertRegexToColumns
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
Definition: RDFInterfaceUtils.cxx:439
f
#define f(i)
Definition: RSha256.hxx:104
TDirectory.h
ROOT::RDF::RInterface::Redefine
RInterface< Proxied, DS_t > Redefine(std::string_view name, std::string_view expression)
Creates a custom column, overriding an existing one with the same name.
Definition: RInterface.hxx:460
ROOT::RDF::RInterface::Aggregate
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
Definition: RInterface.hxx:2343
ROOT::Internal::RDF::CheckValidCppVarName
void CheckValidCppVarName(std::string_view var, const std::string &where)
Definition: RDFInterfaceUtils.cxx:386
InternalTreeUtils.hxx
ROOT::RDF::TH3DModel::GetHistogram
std::shared_ptr<::TH3D > GetHistogram() const
Definition: RDFHistoModels.cxx:200
ROOT::Internal::RDF::ParseTreePath
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
Definition: RDFInterfaceUtils.cxx:606
ROOT::Internal::RDF::CheckTypesAndPars
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
Definition: RDFInterfaceUtils.cxx:542
RSnapshotOptions.hxx
ROOT::RDF::RInterface::CreateAction
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const int nColumns=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which one or more co...
Definition: RInterface.hxx:2608
ROOT::Internal::TreeUtils::GetFriendInfo
RFriendInfo GetFriendInfo(const TTree &tree)
Get and store the names, aliases and file names of the direct friends of the tree.
Definition: InternalTreeUtils.cxx:97
ROOT::RDF::RInterface::Cache
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:723
ROOT::RDF::RInterface::Profile1D
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action).
Definition: RInterface.hxx:1497
ROOT::RDF::RInterface::Define
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Creates a custom column.
Definition: RInterface.hxx:374
ROOT::RDF::RInterface::Profile2D
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action).
Definition: RInterface.hxx:1601
ROOT::RDF::TProfile2DModel::GetProfile
std::shared_ptr<::TProfile2D > GetProfile() const
Definition: RDFHistoModels.cxx:314
ROOT::DisableImplicitMT
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition: TROOT.cxx:541
F
#define F(x, y, z)
tree
Definition: tree.py:1
ROOT::RDF::RInterface::Alias
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
Definition: RInterface.hxx:492
ROOT::RDF::RInterface::Range
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end).
Definition: RInterface.hxx:821
ROOT::RDF::RInterface::Histo1D
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
Definition: RInterface.hxx:1154
RBookedDefines.hxx
r
ROOT::R::TRInterface & r
Definition: Object.C:4
ROOT::RDF::RInterface::Profile2D
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
Fill and return a two-dimensional profile (lazy action).
Definition: RInterface.hxx:1623
ROOT::Internal::RDF::RBookedDefines::GetNames
ColumnNames_t GetNames() const
Returns the list of the names of the defined columns.
Definition: RBookedDefines.hxx:81
ROOT::RDF::RInterface::fDataSource
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
Definition: RInterface.hxx:110
ROOT::RDF::RInterface::Histo1D
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action).
Definition: RInterface.hxx:1094
string_view
basic_string_view< char > string_view
Definition: libcpp_string_view.h:786
ROOT::RDF::TH2DModel
A struct which stores the parameters of a TH2D.
Definition: HistoModels.hxx:45
ROOT::TypeTraits::TypeList
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:25
Utils.hxx
kError
const Int_t kError
Definition: TError.h:46
ROOT::Detail::RDF::RRangeBase
Definition: RRangeBase.hxx:32
TGeant4Unit::s
static constexpr double s
Definition: TGeant4SystemOfUnits.h:162
ROOT::RDF::RInterface::Histo3D
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action).
Definition: RInterface.hxx:1352
InterfaceUtils.hxx
ROOT::RDF::RInterface::Histo1D
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
Definition: RInterface.hxx:1174
ROOT::RDF::RInterface::AddDefaultColumns
void AddDefaultColumns()
Definition: RInterface.hxx:2523
ROOT::RDF::RInterface::Filter
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:252
ROOT::RDF::RInterface::Snapshot
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:574
ROOT::RDF::RInterface::GetFilterNames
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
Definition: RInterface.hxx:2109
ROOT::RDF::RInterface::Histo1D
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action).
Definition: RInterface.hxx:1058
ROOT::RDF::TH2DModel::GetHistogram
std::shared_ptr<::TH2D > GetHistogram() const
Definition: RDFHistoModels.cxx:150
ROOT::RDF::RInterface::Histo1D
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
Definition: RInterface.hxx:1120
ROOT::RDF::RInterface::Sum
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action).
Definition: RInterface.hxx:1886
ROOT::Internal::RDF::RBookedDefines
Encapsulates the columns defined by the user.
Definition: RBookedDefines.hxx:39
ROOT::RDF::RInterface::Report
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
Definition: RInterface.hxx:1919
TDirectory::TContext
TDirectory::TContext keeps track and restore the current directory.
Definition: TDirectory.h:89
ROOT::Detail::RDF::RRange
Definition: RRange.hxx:36
RooFitShortHand::S
RooArgSet S(const RooAbsArg &v1)
Definition: RooGlobalFunc.cxx:390
ROOT::RDF::RSnapshotOptions
A collection of options to steer the creation of the dataset on file.
Definition: RSnapshotOptions.hxx:22
ROOT::RDF::RInterface::Snapshot
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:632
ActionHelpers.hxx
ROOT::RDF::RInterface::Filter
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
Definition: RInterface.hxx:232
ROOT::RDF::RInterface::GetNRuns
unsigned int GetNRuns() const
Gets the number of event loops run.
Definition: RInterface.hxx:2200
TStatistic.h
ROOT::RDF::TProfile1DModel
A struct which stores the parameters of a TProfile.
Definition: HistoModels.hxx:99
ROOT::Internal::RDF::RBookedDefines::AddColumn
void AddColumn(const std::shared_ptr< RDFDetail::RDefineBase > &column, std::string_view name)
Add a new booked column.
Definition: RDFBookedDefines.cxx:21
ROOT::Internal::RDF::RBookedDefines::HasName
bool HasName(std::string_view name) const
Check if the provided name is tracked in the names list.
Definition: RDFBookedDefines.cxx:15
TProfile.h
ROOT::RDataFrame
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTree,...
Definition: RDataFrame.hxx:42
ROOT::RDF::RInterface::CreateAction
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
Definition: RInterface.hxx:2584
ROOT::RDF::RInterface::Snapshot
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:667
ROOT::Detail::RDF::RLoopManager::GetDataSource
RDataSource * GetDataSource() const
Definition: RLoopManager.hxx:159
ROOT::RDF::RInterface::Range
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
Definition: RInterface.hxx:843
ROOT::RDF::RNode
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
Definition: InterfaceUtils.hxx:55
ROOT::Detail::RDF::RLoopManager::ToJitExec
void ToJitExec(const std::string &) const
Definition: RLoopManager.cxx:780
ROOT::RDF::RResultPtr
Smart pointer for the return type of actions.
Definition: RResultPtr.hxx:95
RRange.hxx
ROOT::RDF::RInterface::Histo2D
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
Definition: RInterface.hxx:1269
ROOT::Detail::RDF
Definition: GraphUtils.hxx:28
ROOT::RDF::RDataSource::GetLabel
virtual std::string GetLabel()
Return a string representation of the datasource type.
Definition: RDataSource.hxx:223
ROOT::Detail::RDF::ColumnNames_t
std::vector< std::string > ColumnNames_t
Definition: RLoopManager.hxx:54
ROOT::RDF::RDataSource
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
Definition: RDataSource.hxx:106
ROOT::Internal::RDF::BookFilterJit
void BookFilterJit(const std::shared_ptr< RJittedFilter > &jittedFilter, std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &branches, const RBookedDefines &customCols, TTree *tree, RDataSource *ds)
Definition: RDFInterfaceUtils.cxx:627
ROOT::RDF::RInterface::Display
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", const int &nRows=5)
Provides a representation of the columns in the dataset.
Definition: RInterface.hxx:2502
ROOT::RDF::RInterface::GetColumnTypeNamesList
std::vector< std::string > GetColumnTypeNamesList(const ColumnNames_t &columnList)
Definition: RInterface.hxx:2557
ROOT::Internal::RDF::JitBuildAction
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RBookedDefines &customCols, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
Definition: RDFInterfaceUtils.cxx:719
ROOT::Detail::RDF::RLoopManager::GetDSValuePtrs
const std::map< std::string, std::vector< void * > > & GetDSValuePtrs() const
Definition: RLoopManager.hxx:180
ROOT::RDF::RInterface::RInterface
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
Rgl::Range_t
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1195
ROOT::RDF::RInterface< RDFDetail::RLoopManager >::DS_t
void DS_t
Definition: RInterface.hxx:95
Helper
ROOT::EnableImplicitMT
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition: TROOT.cxx:524
RDataSource.hxx
TProfile2D.h
ROOT::RDF::RInterface::Aggregate
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
Definition: RInterface.hxx:2378
ROOT::RDF::RLazyDS
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:41
ROOT::Internal::RDF::ColumnName2ColumnTypeName
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, RDataSource *ds, RDefineBase *define, bool vector2rvec)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:224
ROOT::Detail::RDF::MakeResultPtr
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, RLoopManager &df, std::shared_ptr< ROOT::Internal::RDF::RActionBase > actionPtr)
Create a RResultPtr and set its pointer to the corresponding RAction This overload is invoked by non-...
Definition: RResultPtr.hxx:418
ROOT::RDF::TH3DModel
A struct which stores the parameters of a TH3D.
Definition: HistoModels.hxx:70
ROOT::Internal::RDF::CheckForDefinition
void CheckForDefinition(const std::string &where, std::string_view definedColView, const ColumnNames_t &customCols, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
Definition: RDFInterfaceUtils.cxx:509
ROOT::RDF::RInterface::RInterface
RInterface(RInterface &&)=default
Move-ctor for RInterface.
ROOT::RDF::RInterface::Histo3D
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
Definition: RInterface.hxx:1371
RLazyDSImpl.hxx
ROOT::RDF::RInterface::fProxiedPtr
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
Definition: RInterface.hxx:106
RIntegerSequence.hxx
ROOT::RDF::RInterface::Take
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
Definition: RInterface.hxx:1015
ROOT::Detail::RDF::RDefineBase
Definition: RDefineBase.hxx:34
TChain.h
ROOT::Internal::RDF::GetTopLevelBranchNames
ColumnNames_t GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
Definition: RDFInterfaceUtils.cxx:416
ROOT::RDF::RInterface::Display
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
Definition: RInterface.hxx:2469
ROOT::RDF::RInterface::GetValidatedColumnNames
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
Definition: RInterface.hxx:2779
ROOT::RDF::RInterface::Describe
std::string Describe()
Return information about the dataframe.
Definition: RInterface.hxx:2034
ROOT::RDF::RInterface::CacheImpl
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... >)
Implementation of cache.
Definition: RInterface.hxx:2727
ROOT::Detail::RDF::RLoopManager::GetBranchNames
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
Definition: RLoopManager.cxx:840
ROOT::RDF::RInterface::Redefine
RInterface< Proxied, DS_t > Redefine(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column.
Definition: RInterface.hxx:405
ROOT::Internal::RDF::RAction
A RDataFrame node that produces a result.
Definition: RAction.hxx:52
h
#define h(i)
Definition: RSha256.hxx:106
ROOT::RDF::RSnapshotOptions::fLazy
bool fLazy
Do not start the event loop when Snapshot is called.
Definition: RSnapshotOptions.hxx:38
ROOT::RDF::RInterface::GetColumnNames
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
Definition: RInterface.hxx:1954
ROOT::RDF::RInterface::DefineSlotEntry
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot and the current entry.
Definition: RInterface.hxx:356
ROOT::Detail::RDF::RLoopManager::GetTree
TTree * GetTree() const
Definition: RLoopManager.cxx:726
ROOT::RDF::RInterface::operator=
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RStringView.hxx
ROOT::RDF::RInterface::Min
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action).
Definition: RInterface.hxx:1767
ROOT::Internal::RDF::PrettyPrintAddr
std::string PrettyPrintAddr(const void *const addr)
Definition: RDFInterfaceUtils.cxx:619
ROOT::RDF::TProfile1DModel::GetProfile
std::shared_ptr<::TProfile > GetProfile() const
Definition: RDFHistoModels.cxx:254
ROOT::Internal::TreeUtils::GetFileNamesFromTree
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
Get and store the file names associated with the input tree.
Definition: InternalTreeUtils.cxx:30
ROOT::RDF::RInterface::Reduce
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:964
ROOT::RDF::RInterface::Cache
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
Definition: RInterface.hxx:772
TypeTraits.hxx
ROOT::Detail::RDF::RLoopManager::Book
void Book(RDFInternal::RActionBase *actionPtr)
Definition: RLoopManager.cxx:731
UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:46
ROOT::RDF::RInterface::Book
RResultPtr< typename Helper::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
Definition: RInterface.hxx:2427
ROOT::RDF::RInterface::Cache
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
Definition: RInterface.hxx:797
ROOT::RDF::RInterface::Profile2D
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action).
Definition: RInterface.hxx:1553
ROOT::Internal::RDF::IsInternalColumn
bool IsInternalColumn(std::string_view colName)
Definition: RDFUtils.cxx:349
TH2.h
ROOT::Detail::RDF::RLoopManager::AddDataBlockCallback
void AddDataBlockCallback(std::function< void(unsigned int)> &&callback)
Definition: RLoopManager.cxx:858
ROOT::RDF::RInterface::RInterface
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RBookedDefines &columns, RDataSource *ds)
Definition: RInterface.hxx:2769
ROOT::RDF::RInterface
The public interface to the RDataFrame federation of classes.
Definition: RInterface.hxx:94
ROOT::Detail::RDF::RLoopManager::GetNEmptyEntries
ULong64_t GetNEmptyEntries() const
Definition: RLoopManager.hxx:158
ROOT::RDF::RInterface::Histo3D
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action).
Definition: RInterface.hxx:1303
ROOT::RDF::RInterface::fDefines
RDFInternal::RBookedDefines fDefines
Contains the custom columns defined up to this node.
Definition: RInterface.hxx:113
TH3.h
ROOT::Internal::RDF::RBookedDefines::GetColumns
const RDefineBasePtrMap_t & GetColumns() const
Returns the list of the pointers to the defined columns.
Definition: RBookedDefines.hxx:85
ROOT::RDF::RInterface::DefineImpl
std::enable_if< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
Definition: RInterface.hxx:2636
ULong64_t
unsigned long long ULong64_t
Definition: RtypesCore.h:81
ROOT::IsImplicitMTEnabled
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:555
ROOT::Internal::RDF::GetValidatedColumnNames
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const ColumnNames_t &validDefines, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
Definition: RDFInterfaceUtils.cxx:788
ROOT::RDF::RInterface::Display
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
Definition: RInterface.hxx:2516
ROOT::RDF::RInterface::Fill
RResultPtr< T > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action).
Definition: RInterface.hxx:1663
ROOT::RDF::RInterface::DefineSlot
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot.
Definition: RInterface.hxx:326
TGraph
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
ROOT::RDF::RInterface::HasColumn
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
Definition: RInterface.hxx:2154
RtypesCore.h
ROOT::Detail::RDF::RDefine
Definition: RDefine.hxx:45
ROOT::RDF::RInterface::Snapshot
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:592
ROOT::RDF::RInterface::GetColumnType
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
Definition: RInterface.hxx:1996
ROOT::RDF::RInterface::GetDefinedColumnNames
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
Definition: RInterface.hxx:2126
ROOT::Internal::RDF::RBookedDefines::AddName
void AddName(std::string_view name)
Add a new name to the list returned by GetNames without booking a new column.
Definition: RDFBookedDefines.cxx:30
ROOT::RDF::RInterface::CallCreateActionWithoutColsIfPossible
auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &resPtr, const std::shared_ptr< Helper > &hPtr, TTraits::TypeList< RDFDetail::RInferredType >) -> decltype(hPtr->Exec(0u), RResultPtr< ActionResultType >
Definition: RInterface.hxx:2748
ROOT::RDF::RInterface::RedefineSlotEntry
RInterface< Proxied, DS_t > RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column, possibly overriding an existing one with the same name.
Definition: RInterface.hxx:441
ROOT::RDF::RInterface::Count
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action).
Definition: RInterface.hxx:982
ROOT::Internal::RDF::GetValidatedArgTypes
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RBookedDefines &defines, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
Definition: RDFInterfaceUtils.cxx:817
ROOT::RDF::RInterface::Reduce
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:941
ROOT::Detail::RDF::RLoopManager::AddColumnAlias
void AddColumnAlias(const std::string &alias, const std::string &colName)
Definition: RLoopManager.hxx:175
ROOT::Detail::RDF::RFilter
Definition: RFilter.hxx:53
graph
Definition: graph.py:1
ROOT::RDF::RInterface::Max
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action).
Definition: RInterface.hxx:1797
ROOT::Internal::RDF::CheckForDuplicateSnapshotColumns
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
Definition: RDFInterfaceUtils.cxx:850
ROOT::Internal::RDF
Definition: RArrowDS.hxx:23
HistoModels.hxx
ROOT::Detail::RDF::RLoopManager::Jit
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
Definition: RLoopManager.cxx:652
ROOT::Detail::RDF::RLoopManager::GetAliasMap
const std::map< std::string, std::string > & GetAliasMap() const
Definition: RLoopManager.hxx:176
name
char name[80]
Definition: TGX11.cxx:110
ROOT::RDF::RInterface::Display
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
Definition: RInterface.hxx:2485
ROOT::RDF::RInterface::RInterface
RInterface(const std::shared_ptr< Proxied > &proxied)
Only enabled when building a RInterface<RLoopManager>.
Definition: RInterface.hxx:131
ROOT::RDF::RInterface::CheckIMTDisabled
void CheckIMTDisabled(std::string_view callerName)
Definition: RInterface.hxx:2567
ROOT::Math::Chebyshev::T
double T(double x)
Definition: ChebyshevPol.h:34
ROOT::Internal::RDF::ResolveAlias
std::string ResolveAlias(const std::string &col, const std::map< std::string, std::string > &aliasMap)
Definition: RDFInterfaceUtils.cxx:373
ROOT::Detail::RDF::RLoopManager::GetNRuns
unsigned int GetNRuns() const
Definition: RLoopManager.hxx:178
ROOT::Internal::RDF::UpcastNode
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
Definition: RDFInterfaceUtils.cxx:778
ROOT::RDF::RInterface::fLoopManager
RLoopManager * fLoopManager
Definition: RInterface.hxx:108
ROOT::RDF::RInterface::GetProxiedPtr
const std::shared_ptr< Proxied > & GetProxiedPtr() const
Definition: RInterface.hxx:2777
ROOT::Internal::RDF::GetFilterNames
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
Definition: RDFInterfaceUtils.cxx:601
ROOT::RDF::RInterface::Mean
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action).
Definition: RInterface.hxx:1826
ROOT::RDF::RInterface::Profile1D
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
Fill and return a one-dimensional profile (lazy action).
Definition: RInterface.hxx:1519
TChain
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
ROOT::Internal::RDF::AtLeastOneEmptyString
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
Definition: RDFInterfaceUtils.cxx:769
ROOT::RDF::RInterface::SnapshotImpl
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view fullTreeName, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
Definition: RInterface.hxx:2697
ROOT::RDF::RInterface< RDFDetail::RLoopManager >::ColumnNames_t
RDFDetail::ColumnNames_t ColumnNames_t
Definition: RInterface.hxx:96
type
int type
Definition: TGX11.cxx:121
ROOT::RDF::RInterface::Cache
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:711
ROOT::RDF::RInterface::GetLoopManager
RLoopManager * GetLoopManager() const
Definition: RInterface.hxx:2775
TStatistic
Statistical variable, defined by its mean and variance (RMS).
Definition: TStatistic.h:33
ROOT::RDF::RInterface::CallCreateActionWithoutColsIfPossible
RResultPtr< ActionResultType > CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &, const std::shared_ptr< Helper > &, Others...)
Definition: RInterface.hxx:2758
ROOT::RDF::RInterface::RedefineSlot
RInterface< Proxied, DS_t > RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column, possibly overriding an existing one with the same name.
Definition: RInterface.hxx:423
ROOT::RDF::RInterface::CheckAndFillDSColumns
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
Definition: RInterface.hxx:2785
ROOT::RDF::RInterface::Profile1D
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action).
Definition: RInterface.hxx:1452
ROOT::RDF::TH1DModel::GetHistogram
std::shared_ptr<::TH1D > GetHistogram() const
Definition: RDFHistoModels.cxx:98
ROOT::RDF::RInterface::Foreach
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
Definition: RInterface.hxx:863
ROOT::RDF::RInterface::Graph
RResultPtr<::TGraph > Graph(std::string_view v1Name="", std::string_view v2Name="")
Fill and return a graph (lazy action).
Definition: RInterface.hxx:1405
ROOT::Internal::RDF::GetBranchNames
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
Definition: RLoopManager.cxx:320
ROOT::RDF::RInterface::Filter
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:192
ROOT::Internal::RDF::TypeID2TypeName
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:99
ROOT::Detail::RDF::RLoopManager::GetNSlots
unsigned int GetNSlots() const
Definition: RLoopManager.hxx:167
TH1.h
ROOT::RDF::RInterface::Filter
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
Definition: RInterface.hxx:216
ROOT
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: EExecutionPolicy.hxx:4
RResultPtr.hxx
ROOT::Internal::RDF::BookDefineJit
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RBookedDefines &customCols, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Definition: RDFInterfaceUtils.cxx:673
ROOT::RDF::RInterface::ForeachSlot
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action).
Definition: RInterface.hxx:893
ROOT::RDF::RDataSource::HasColumn
virtual bool HasColumn(std::string_view colName) const =0
Checks if the dataset has a certain column.
ROOT::RDF::TProfile2DModel
A struct which stores the parameters of a TProfile2D.
Definition: HistoModels.hxx:124
ROOT::RDF::RInterface::Stats
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action).
Definition: RInterface.hxx:1721
ROOT::Detail::RDF::RLoopManager
The head node of a RDF computation graph.
Definition: RLoopManager.hxx:58
TClassEdit::DemangleTypeIdName
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
Definition: TClassEdit.cxx:2128
int
ROOT::RDF::TH1DModel
A struct which stores the parameters of a TH1D.
Definition: HistoModels.hxx:27
ROOT::RDF::RInterface::Histo2D
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action).
Definition: RInterface.hxx:1208
ROOT::Internal::RDF::GetColumnWidth
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace)
Definition: RDFUtils.cxx:358
ROOT::RDF::RInterface::Histo2D
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action).
Definition: RInterface.hxx:1251