Logo ROOT  
Reference Guide
RInterface.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
13
14#include "ROOT/RDataSource.hxx"
19#include "ROOT/RDF/RRange.hxx"
20#include "ROOT/RDF/Utils.hxx"
23#include "ROOT/RResultPtr.hxx"
25#include "ROOT/RStringView.hxx"
26#include "ROOT/TypeTraits.hxx"
27#include "RtypesCore.h" // for ULong64_t
28#include "TH1.h" // For Histo actions
29#include "TH2.h" // For Histo actions
30#include "TH3.h" // For Histo actions
31#include "TProfile.h"
32#include "TProfile2D.h"
33#include "TStatistic.h"
34
35#include <algorithm>
36#include <cstddef>
37#include <initializer_list>
38#include <limits>
39#include <memory>
40#include <sstream>
41#include <stdexcept>
42#include <string>
43#include <type_traits> // is_same, enable_if
44#include <typeinfo>
45#include <vector>
46
47class TGraph;
48
49// Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
50namespace ROOT {
53void EnableImplicitMT(UInt_t numthreads);
54class RDataFrame;
55namespace Internal {
56namespace RDF {
58}
59}
60}
61namespace cling {
62std::string printValue(ROOT::RDataFrame *tdf);
63}
64
65namespace ROOT {
66namespace RDF {
69namespace TTraits = ROOT::TypeTraits;
70
71template <typename Proxied, typename DataSource>
72class RInterface;
73
74using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
75
76// clang-format off
77/**
78 * \class ROOT::RDF::RInterface
79 * \ingroup dataframe
80 * \brief The public interface to the RDataFrame federation of classes
81 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
82 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
83 *
84 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
85 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
86 */
87// clang-format on
88template <typename Proxied, typename DataSource = void>
90 using DS_t = DataSource;
95 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
97
98 template <typename T, typename W>
99 friend class RInterface;
100
101 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
102 ///< The RLoopManager at the root of this computation graph. Never null.
104 /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
106
107 /// Contains the custom columns defined up to this node.
109
110public:
111 ////////////////////////////////////////////////////////////////////////////
112 /// \brief Copy-assignment operator for RInterface.
113 RInterface &operator=(const RInterface &) = default;
114
115 ////////////////////////////////////////////////////////////////////////////
116 /// \brief Copy-ctor for RInterface.
117 RInterface(const RInterface &) = default;
118
119 ////////////////////////////////////////////////////////////////////////////
120 /// \brief Move-ctor for RInterface.
121 RInterface(RInterface &&) = default;
122
123 ////////////////////////////////////////////////////////////////////////////
124 /// \brief Only enabled when building a RInterface<RLoopManager>
125 template <typename T = Proxied, typename std::enable_if<std::is_same<T, RLoopManager>::value, int>::type = 0>
126 RInterface(const std::shared_ptr<Proxied> &proxied)
127 : fProxiedPtr(proxied), fLoopManager(proxied.get()), fDataSource(proxied->GetDataSource())
128 {
130 }
131
132 ////////////////////////////////////////////////////////////////////////////
133 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
134 /// Different RDataFrame methods return different C++ types. All nodes, however,
135 /// can be cast to this common type at the cost of a small performance penalty.
136 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
137 /// around via (non-template, C++11) helper functions.
138 /// Example usage:
139 /// ~~~{.cpp}
140 /// // a function that conditionally adds a Range to a RDataFrame node.
141 /// RNode MaybeAddRange(RNode df, bool mustAddRange)
142 /// {
143 /// return mustAddRange ? df.Range(1) : df;
144 /// }
145 /// // use as :
146 /// ROOT::RDataFrame df(10);
147 /// auto maybeRanged = MaybeAddRange(df, true);
148 /// ~~~
149 /// Note that it is not a problem to pass RNode's by value.
150 operator RNode() const
151 {
152 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fCustomColumns,
154 }
155
156 ////////////////////////////////////////////////////////////////////////////
157 /// \brief Append a filter to the call graph.
158 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
159 /// signalling whether the event has passed the selection (true) or not (false).
160 /// \param[in] columns Names of the columns/branches in input to the filter function.
161 /// \param[in] name Optional name of this filter. See `Report`.
162 /// \return the filter node of the computation graph.
163 ///
164 /// Append a filter node at the point of the call graph corresponding to the
165 /// object this method is called on.
166 /// The callable `f` should not have side-effects (e.g. modification of an
167 /// external or static variable) to ensure correct results when implicit
168 /// multi-threading is active.
169 ///
170 /// RDataFrame only evaluates filters when necessary: if multiple filters
171 /// are chained one after another, they are executed in order and the first
172 /// one returning false causes the event to be discarded.
173 /// Even if multiple actions or transformations depend on the same filter,
174 /// it is executed once per entry. If its result is requested more than
175 /// once, the cached result is served.
176 ///
177 /// ### Example usage:
178 /// ~~~{.cpp}
179 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
180 /// auto filtered = df.Filter(myCut, {"x", "y"});
181 ///
182 /// // String: it must contain valid C++ except that column names can be used instead of variable names
183 /// auto filtered = df.Filter("x*y > 0");
184 /// ~~~
185 template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
187 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
188 {
189 RDFInternal::CheckFilter(f);
190 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
191 constexpr auto nColumns = ColTypes_t::list_size;
192 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
193 const auto newColumns =
194 CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
195
197
198 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, newColumns, name);
199 fLoopManager->Book(filterPtr.get());
200 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, newColumns, fDataSource);
201 }
202
203 ////////////////////////////////////////////////////////////////////////////
204 /// \brief Append a filter to the call graph.
205 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
206 /// signalling whether the event has passed the selection (true) or not (false).
207 /// \param[in] name Optional name of this filter. See `Report`.
208 /// \return the filter node of the computation graph.
209 ///
210 /// Refer to the first overload of this method for the full documentation.
211 template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
213 {
214 // The sfinae is there in order to pick up the overloaded method which accepts two strings
215 // rather than this template method.
216 return Filter(f, {}, name);
217 }
218
219 ////////////////////////////////////////////////////////////////////////////
220 /// \brief Append a filter to the call graph.
221 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
222 /// signalling whether the event has passed the selection (true) or not (false).
223 /// \param[in] columns Names of the columns/branches in input to the filter function.
224 /// \return the filter node of the computation graph.
225 ///
226 /// Refer to the first overload of this method for the full documentation.
227 template <typename F>
228 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
229 {
230 return Filter(f, ColumnNames_t{columns});
231 }
232
233 ////////////////////////////////////////////////////////////////////////////
234 /// \brief Append a filter to the call graph.
235 /// \param[in] expression The filter expression in C++
236 /// \param[in] name Optional name of this filter. See `Report`.
237 /// \return the filter node of the computation graph.
238 ///
239 /// The expression is just-in-time compiled and used to filter entries. It must
240 /// be valid C++ syntax in which variable names are substituted with the names
241 /// of branches/columns.
242 ///
243 /// ### Example usage:
244 /// ~~~{.cpp}
245 /// auto filtered_df = df.Filter("myCollection.size() > 3");
246 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
247 /// ~~~
249 {
250 // deleted by the jitted call to JitFilterHelper
251 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
252 using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
253 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
254 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(fLoopManager, name);
255
256 RDFInternal::BookFilterJit(jittedFilter.get(), upcastNodeOnHeap, name, expression, fLoopManager->GetAliasMap(),
259
260 fLoopManager->Book(jittedFilter.get());
263 }
264
265 // clang-format off
266 ////////////////////////////////////////////////////////////////////////////
267 /// \brief Creates a custom column
268 /// \param[in] name The name of the custom column.
269 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
270 /// \param[in] columns Names of the columns/branches in input to the producer function.
271 /// \return the first node of the computation graph for which the new quantity is defined.
272 ///
273 /// Create a custom column that will be visible from all subsequent nodes
274 /// of the functional chain. The `expression` is only evaluated for entries that pass
275 /// all the preceding filters.
276 /// A new variable is created called `name`, accessible as if it was contained
277 /// in the dataset from subsequent transformations/actions.
278 ///
279 /// Use cases include:
280 /// * caching the results of complex calculations for easy and efficient multiple access
281 /// * extraction of quantities of interest from complex objects
282 ///
283 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
284 ///
285 /// ### Example usage:
286 /// ~~~{.cpp}
287 /// // assuming a function with signature:
288 /// double myComplexCalculation(const RVec<float> &muon_pts);
289 /// // we can pass it directly to Define
290 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
291 /// // alternatively, we can pass the body of the function as a string, as in Filter:
292 /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
293 /// ~~~
294 template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
296 {
297 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns);
298 }
299 // clang-format on
300
301 // clang-format off
302 ////////////////////////////////////////////////////////////////////////////
303 /// \brief Creates a custom column with a value dependent on the processing slot.
304 /// \param[in] name The name of the custom column.
305 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
306 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
307 /// \return the first node of the computation graph for which the new quantity is defined.
308 ///
309 /// This alternative implementation of `Define` is meant as a helper in writing thread-safe custom columns.
310 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
311 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
312 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
313 /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1.
314 ///
315 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
316 /// ~~~{.cpp}
317 /// int function(unsigned int, double, double);
318 /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
319 /// df.DefineSlot("x", function, {"column1", "column2"})
320 /// ~~~
321 ///
322 /// See Define for more information.
323 template <typename F>
325 {
326 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns);
327 }
328 // clang-format on
329
330 // clang-format off
331 ////////////////////////////////////////////////////////////////////////////
332 /// \brief Creates a custom column with a value dependent on the processing slot and the current entry.
333 /// \param[in] name The name of the custom column.
334 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
335 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
336 /// \return the first node of the computation graph for which the new quantity is defined.
337 ///
338 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
339 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
340 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
341 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
342 /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1. The second parameter
343 /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
344 ///
345 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
346 /// ~~~{.cpp}
347 /// int function(unsigned int, ULong64_t, double, double);
348 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
349 /// DefineSlotEntry("x", function, {"column1", "column2"})
350 /// ~~~
351 ///
352 /// See Define for more information.
353 template <typename F>
355 {
356 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns);
357 }
358 // clang-format on
359
360 ////////////////////////////////////////////////////////////////////////////
361 /// \brief Creates a custom column
362 /// \param[in] name The name of the custom column.
363 /// \param[in] expression An expression in C++ which represents the temporary value
364 /// \return the first node of the computation graph for which the new quantity is defined.
365 ///
366 /// The expression is just-in-time compiled and used to produce the column entries.
367 /// It must be valid C++ syntax in which variable names are substituted with the names
368 /// of branches/columns.
369 ///
370 /// Refer to the first overload of this method for the full documentation.
372 {
373 // this check must be done before jitting lest we throw exceptions in jitted code
377
378 auto jittedCustomColumn =
379 std::make_shared<RDFDetail::RJittedCustomColumn>(fLoopManager, name, fLoopManager->GetNSlots());
380
381 RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, jittedCustomColumn, fCustomColumns,
383
385 newCols.AddName(name);
386 newCols.AddColumn(jittedCustomColumn, name);
387
388 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
389
390 return newInterface;
391 }
392
393 ////////////////////////////////////////////////////////////////////////////
394 /// \brief Allow to refer to a column with a different name
395 /// \param[in] alias name of the column alias
396 /// \param[in] columnName of the column to be aliased
397 /// \return the first node of the computation graph for which the alias is available.
398 ///
399 /// Aliasing an alias is supported.
400 ///
401 /// ### Example usage:
402 /// ~~~{.cpp}
403 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
404 /// ~~~
406 {
407 // The symmetry with Define is clear. We want to:
408 // - Create globally the alias and return this very node, unchanged
409 // - Make aliases accessible based on chains and not globally
410
411 // Helper to find out if a name is a column
412 auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
413
414 // If the alias name is a column name, there is a problem
416 fLoopManager->GetAliasMap(), dsColumnNames);
417
418 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
419
420 fLoopManager->AddColumnAlias(std::string(alias), validColumnName);
421
423
424 newCols.AddName(alias);
425 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
426
427 return newInterface;
428 }
429
430 ////////////////////////////////////////////////////////////////////////////
431 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
432 /// \tparam ColumnTypes variadic list of branch/column types.
433 /// \param[in] treename The name of the output TTree.
434 /// \param[in] filename The name of the output TFile.
435 /// \param[in] columnList The list of names of the columns/branches to be written.
436 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
437 /// \return a `RDataFrame` that wraps the snapshotted dataset.
438 ///
439 /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
440 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
441 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
442 /// written out and it appears before the array in the columnList.
443 ///
444 /// ### Example invocations:
445 ///
446 /// ~~~{.cpp}
447 /// // without specifying template parameters (column types automatically deduced)
448 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
449 ///
450 /// // specifying template parameters ("x" is `int`, "y" is `float`)
451 /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
452 /// ~~~
453 ///
454 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
455 /// `RSnapshotOptions`:
456 /// ~~~{.cpp}
457 /// RSnapshotOptions opts;
458 /// opts.fLazy = true;
459 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
460 /// ~~~
461 template <typename... ColumnTypes>
463 Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
464 const RSnapshotOptions &options = RSnapshotOptions())
465 {
466 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
467 }
468
469 ////////////////////////////////////////////////////////////////////////////
470 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
471 /// \param[in] treename The name of the output TTree.
472 /// \param[in] filename The name of the output TFile.
473 /// \param[in] columnList The list of names of the columns/branches to be written.
474 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
475 /// \return a `RDataFrame` that wraps the snapshotted dataset.
476 ///
477 /// This function returns a `RDataFrame` built with the output tree as a source.
478 /// The types of the columns are automatically inferred and do not need to be specified.
479 ///
480 /// See above for a more complete description and example usages.
482 const ColumnNames_t &columnList,
483 const RSnapshotOptions &options = RSnapshotOptions())
484 {
485 // Early return: if the list of columns is empty, just return an empty RDF
486 // If we proceed, the jitted call will not compile!
487 if (columnList.empty()) {
488 auto nEntries = *this->Count();
489 auto snapshotRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(nEntries));
490 return MakeResultPtr(snapshotRDF, *fLoopManager, nullptr);
491 }
492 auto tree = fLoopManager->GetTree();
493 const auto nsID = fLoopManager->GetID();
494 std::stringstream snapCall;
495 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
496 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
498
499 // build a string equivalent to
500 // "resPtr = (RInterface<nodetype*>*)(this)->Snapshot<Ts...>(args...)"
502 snapCall << "*reinterpret_cast<ROOT::RDF::RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>>*>("
504 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
505 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Snapshot<";
506
507 const auto &customCols = fCustomColumns.GetNames();
508 const auto dontConvertVector = false;
509
510 const auto validColumnNames = GetValidatedColumnNames(columnList.size(), columnList);
511
512 for (auto &c : validColumnNames) {
513 const auto isCustom = std::find(customCols.begin(), customCols.end(), c) != customCols.end();
514 const auto customColID = isCustom ? fCustomColumns.GetColumns().at(c)->GetID() : 0;
515 snapCall << RDFInternal::ColumnName2ColumnTypeName(c, nsID, tree, fDataSource, isCustom, dontConvertVector,
516 customColID)
517 << ", ";
518 };
519 if (!columnList.empty())
520 snapCall.seekp(-2, snapCall.cur); // remove the last ",
521 snapCall << ">(\"" << treename << "\", \"" << filename << "\", "
522 << "*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
523 << RDFInternal::PrettyPrintAddr(&columnList) << "),"
524 << "*reinterpret_cast<ROOT::RDF::RSnapshotOptions*>(" << RDFInternal::PrettyPrintAddr(&options) << "));";
525 // jit snapCall, return result
526 fLoopManager->JitDeclarations(); // some type aliases might be needed by the code jitted in the next line
527 RDFInternal::InterpreterCalc(snapCall.str(), "Snapshot");
528 return resPtr;
529 }
530
531 // clang-format off
532 ////////////////////////////////////////////////////////////////////////////
533 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
534 /// \param[in] treename The name of the output TTree.
535 /// \param[in] filename The name of the output TFile.
536 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
537 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
538 /// \return a `RDataFrame` that wraps the snapshotted dataset.
539 ///
540 /// This function returns a `RDataFrame` built with the output tree as a source.
541 /// The types of the columns are automatically inferred and do not need to be specified.
542 ///
543 /// See above for a more complete description and example usages.
545 std::string_view columnNameRegexp = "",
546 const RSnapshotOptions &options = RSnapshotOptions())
547 {
551 columnNameRegexp,
552 "Snapshot");
553 return Snapshot(treename, filename, selectedColumns, options);
554 }
555 // clang-format on
556
557 // clang-format off
558 ////////////////////////////////////////////////////////////////////////////
559 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
560 /// \param[in] treename The name of the output TTree.
561 /// \param[in] filename The name of the output TFile.
562 /// \param[in] columnList The list of names of the columns/branches to be written.
563 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
564 /// \return a `RDataFrame` that wraps the snapshotted dataset.
565 ///
566 /// This function returns a `RDataFrame` built with the output tree as a source.
567 /// The types of the columns are automatically inferred and do not need to be specified.
568 ///
569 /// See above for a more complete description and example usages.
571 std::initializer_list<std::string> columnList,
572 const RSnapshotOptions &options = RSnapshotOptions())
573 {
574 ColumnNames_t selectedColumns(columnList);
575 return Snapshot(treename, filename, selectedColumns, options);
576 }
577 // clang-format on
578
579 ////////////////////////////////////////////////////////////////////////////
580 /// \brief Save selected columns in memory
581 /// \tparam ColumnTypes variadic list of branch/column types.
582 /// \param[in] columns to be cached in memory.
583 /// \return a `RDataFrame` that wraps the cached dataset.
584 ///
585 /// This action returns a new `RDataFrame` object, completely detached from
586 /// the originating `RDataFrame`. The new dataframe only contains the cached
587 /// columns and stores their content in memory for fast, zero-copy subsequent access.
588 ///
589 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
590 /// fits in memory and that will be accessed many times.
591 ///
592 /// ### Example usage:
593 ///
594 /// **Types and columns specified:**
595 /// ~~~{.cpp}
596 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
597 /// ~~~
598 ///
599 /// **Types inferred and columns specified (this invocation relies on jitting):**
600 /// ~~~{.cpp}
601 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
602 /// ~~~
603 ///
604 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
605 /// ~~~{.cpp}
606 /// auto cache_all_cols_df = df.Cache(myRegexp);
607 /// ~~~
608 template <typename... ColumnTypes>
610 {
611 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
612 return CacheImpl<ColumnTypes...>(columnList, staticSeq);
613 }
614
615 ////////////////////////////////////////////////////////////////////////////
616 /// \brief Save selected columns in memory
617 /// \param[in] columns to be cached in memory
618 /// \return a `RDataFrame` that wraps the cached dataset.
619 ///
620 /// See the previous overloads for more information.
622 {
623 // Early return: if the list of columns is empty, just return an empty RDF
624 // If we proceed, the jitted call will not compile!
625 if (columnList.empty()) {
626 auto nEntries = *this->Count();
627 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
628 return emptyRDF;
629 }
630
631 auto tree = fLoopManager->GetTree();
632 const auto nsID = fLoopManager->GetID();
633 std::stringstream cacheCall;
634 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
635 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
637 // build a string equivalent to
638 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
639 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
640 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
642 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
643 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
644
645 const auto &customCols = fCustomColumns.GetNames();
646 for (auto &c : columnList) {
647 const auto isCustom = std::find(customCols.begin(), customCols.end(), c) != customCols.end();
648 const auto customColID = isCustom ? fCustomColumns.GetColumns().at(c)->GetID() : 0;
649 cacheCall << RDFInternal::ColumnName2ColumnTypeName(c, nsID, tree, fDataSource, isCustom,
650 /*vector2rvec=*/true, customColID)
651 << ", ";
652 };
653 if (!columnList.empty())
654 cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
655 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
656 << RDFInternal::PrettyPrintAddr(&columnList) << "));";
657 // jit cacheCall, return result
658 fLoopManager->JitDeclarations(); // some type aliases might be needed by the code jitted in the next line
659 RDFInternal::InterpreterCalc(cacheCall.str(), "Cache");
660 return resRDF;
661 }
662
663 ////////////////////////////////////////////////////////////////////////////
664 /// \brief Save selected columns in memory
665 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
666 /// \return a `RDataFrame` that wraps the cached dataset.
667 ///
668 /// The existing columns are matched against the regular expression. If the string provided
669 /// is empty, all columns are selected. See the previous overloads for more information.
671 {
672
674 columnNameRegexp, "Cache");
675 return Cache(selectedColumns);
676 }
677
678 ////////////////////////////////////////////////////////////////////////////
679 /// \brief Save selected columns in memory
680 /// \param[in] columns to be cached in memory.
681 /// \return a `RDataFrame` that wraps the cached dataset.
682 ///
683 /// See the previous overloads for more information.
684 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
685 {
686 ColumnNames_t selectedColumns(columnList);
687 return Cache(selectedColumns);
688 }
689
690 // clang-format off
691 ////////////////////////////////////////////////////////////////////////////
692 /// \brief Creates a node that filters entries based on range: [begin, end)
693 /// \param[in] begin Initial entry number considered for this range.
694 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
695 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
696 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
697 ///
698 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
699 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
700 ///
701 /// ### Example usage:
702 /// ~~~{.cpp}
703 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
704 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
705 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
706 /// ~~~
707 // clang-format on
708 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
709 {
710 // check invariants
711 if (stride == 0 || (end != 0 && end < begin))
712 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
713 CheckIMTDisabled("Range");
714
716 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
717 fLoopManager->Book(rangePtr.get());
719 return tdf_r;
720 }
721
722 // clang-format off
723 ////////////////////////////////////////////////////////////////////////////
724 /// \brief Creates a node that filters entries based on range
725 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
726 /// \return a node of the computation graph for which the range is defined.
727 ///
728 /// See the other Range overload for a detailed description.
729 // clang-format on
730 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
731
732 // clang-format off
733 ////////////////////////////////////////////////////////////////////////////
734 /// \brief Execute a user-defined function on each entry (*instant action*)
735 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
736 /// \param[in] columns Names of the columns/branches in input to the user function.
737 ///
738 /// The callable `f` is invoked once per entry. This is an *instant action*:
739 /// upon invocation, an event loop as well as execution of all scheduled actions
740 /// is triggered.
741 /// Users are responsible for the thread-safety of this callable when executing
742 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
743 ///
744 /// ### Example usage:
745 /// ~~~{.cpp}
746 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
747 /// ~~~
748 // clang-format on
749 template <typename F>
750 void Foreach(F f, const ColumnNames_t &columns = {})
751 {
752 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
753 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
754 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
755 }
756
757 // clang-format off
758 ////////////////////////////////////////////////////////////////////////////
759 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*)
760 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
761 /// \param[in] columns Names of the columns/branches in input to the user function.
762 ///
763 /// Same as `Foreach`, but the user-defined function takes an extra
764 /// `unsigned int` as its first parameter, the *processing slot index*.
765 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
766 /// for each thread of execution.
767 /// This is meant as a helper in writing thread-safe `Foreach`
768 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
769 /// The user-defined processing callable is able to follow different
770 /// *streams of processing* indexed by the first parameter.
771 /// `ForeachSlot` works just as well with single-thread execution: in that
772 /// case `slot` will always be `0`.
773 ///
774 /// ### Example usage:
775 /// ~~~{.cpp}
776 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
777 /// ~~~
778 // clang-format on
779 template <typename F>
780 void ForeachSlot(F f, const ColumnNames_t &columns = {})
781 {
782 using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
783 constexpr auto nColumns = ColTypes_t::list_size;
784
785 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
786
787 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
788
789 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
791
792 auto action =
793 std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, std::move(newColumns));
794 fLoopManager->Book(action.get());
795
796 fLoopManager->Run();
797 }
798
799 // clang-format off
800 ////////////////////////////////////////////////////////////////////////////
801 /// \brief Execute a user-defined reduce operation on the values of a column.
802 /// \tparam F The type of the reduce callable. Automatically deduced.
803 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
804 /// \param[in] f A callable with signature `T(T,T)`
805 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
806 /// \return the reduced quantity wrapped in a `RResultPtr`.
807 ///
808 /// A reduction takes two values of a column and merges them into one (e.g.
809 /// by summing them, taking the maximum, etc). This action performs the
810 /// specified reduction operation on all processed column values, returning
811 /// a single value of the same type. The callable f must satisfy the general
812 /// requirements of a *processing function* besides having signature `T(T,T)`
813 /// where `T` is the type of column columnName.
814 ///
815 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
816 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
817 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
818 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
819 /// overload.
820 ///
821 /// ### Example usage:
822 /// ~~~{.cpp}
823 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
824 /// ~~~
825 ///
826 /// This action is *lazy*: upon invocation of this method the calculation is
827 /// booked but not executed. See RResultPtr documentation.
828 // clang-format on
829 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
831 {
832 static_assert(
833 std::is_default_constructible<T>::value,
834 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
835 return Reduce(std::move(f), columnName, T());
836 }
837
838 ////////////////////////////////////////////////////////////////////////////
839 /// \brief Execute a user-defined reduce operation on the values of a column.
840 /// \tparam F The type of the reduce callable. Automatically deduced.
841 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
842 /// \param[in] f A callable with signature `T(T,T)`
843 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
844 /// \param[in] redIdentity The reduced object of each thread is initialised to this value.
845 /// \return the reduced quantity wrapped in a `RResultPtr`.
846 ///
847 /// ### Example usage:
848 /// ~~~{.cpp}
849 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
850 /// ~~~
851 /// See the description of the first Reduce overload for more information.
852 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
853 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
854 {
855 return Aggregate(f, f, columnName, redIdentity);
856 }
857
858 ////////////////////////////////////////////////////////////////////////////
859 /// \brief Return the number of entries processed (*lazy action*)
860 /// \return the number of entries wrapped in a `RResultPtr`.
861 ///
862 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
863 /// This action is *lazy*: upon invocation of this method the calculation is
864 /// booked but not executed. See RResultPtr documentation.
865 ///
866 /// ### Example usage:
867 /// ~~~{.cpp}
868 /// auto nEntriesAfterCuts = myFilteredDf.Count();
869 /// ~~~
870 ///
872 {
873 const auto nSlots = fLoopManager->GetNSlots();
874 auto cSPtr = std::make_shared<ULong64_t>(0);
875 using Helper_t = RDFInternal::CountHelper;
877 auto action =
878 std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr, std::move(fCustomColumns));
879 fLoopManager->Book(action.get());
880 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
881 }
882
883 ////////////////////////////////////////////////////////////////////////////
884 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default)
885 /// \tparam T The type of the column.
886 /// \tparam COLL The type of collection used to store the values.
887 /// \param[in] column The name of the column to collect the values of.
888 /// \return the content of the selected column wrapped in a `RResultPtr`.
889 ///
890 /// The collection type to be specified for C-style array columns is `RVec<T>`:
891 /// in this case the returned collection is a `std::vector<RVec<T>>`.
892 /// ### Example usage:
893 /// ~~~{.cpp}
894 /// // In this case intCol is a std::vector<int>
895 /// auto intCol = rdf.Take<int>("integerColumn");
896 /// // Same content as above but in this case taken as a RVec<int>
897 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
898 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
899 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
900 /// ~~~
901 /// This action is *lazy*: upon invocation of this method the calculation is
902 /// booked but not executed. See RResultPtr documentation.
903 template <typename T, typename COLL = std::vector<T>>
905 {
906 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
907
908 const auto validColumnNames = GetValidatedColumnNames(1, columns);
909
910 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<1>(), TTraits::TypeList<T>());
911
912 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
914 auto valuesPtr = std::make_shared<COLL>();
915 const auto nSlots = fLoopManager->GetNSlots();
916
917 auto action =
918 std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, std::move(newColumns));
919 fLoopManager->Book(action.get());
920 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
921 }
922
923 ////////////////////////////////////////////////////////////////////////////
924 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
925 /// \tparam V The type of the column used to fill the histogram.
926 /// \param[in] model The returned histogram will be constructed using this as a model.
927 /// \param[in] vName The name of the column that will fill the histogram.
928 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
929 ///
930 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
931 /// is filled with each one of the elements of the container. In case multiple columns of container type
932 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
933 /// possibly different lengths between events).
934 /// This action is *lazy*: upon invocation of this method the calculation is
935 /// booked but not executed. See RResultPtr documentation.
936 ///
937 /// ### Example usage:
938 /// ~~~{.cpp}
939 /// // Deduce column type (this invocation needs jitting internally)
940 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
941 /// // Explicit column type
942 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
943 /// ~~~
944 ///
945 template <typename V = RDFDetail::RInferredType>
946 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
947 {
948 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
949
950 const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
951
952 std::shared_ptr<::TH1D> h(nullptr);
953 {
954 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
955 h = model.GetHistogram();
956 h->SetDirectory(nullptr);
957 }
958
959 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
960 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
961 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h);
962 }
963
964 ////////////////////////////////////////////////////////////////////////////
965 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
966 /// \tparam V The type of the column used to fill the histogram.
967 /// \param[in] vName The name of the column that will fill the histogram.
968 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
969 ///
970 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
971 /// The "name" and "title" strings are built starting from the input column name.
972 /// See the description of the first Histo1D overload for more details.
973 ///
974 /// ### Example usage:
975 /// ~~~{.cpp}
976 /// // Deduce column type (this invocation needs jitting internally)
977 /// auto myHist1 = myDf.Histo1D("myColumn");
978 /// // Explicit column type
979 /// auto myHist2 = myDf.Histo1D<float>("myColumn");
980 /// ~~~
981 ///
982 template <typename V = RDFDetail::RInferredType>
984 {
985 const auto h_name = std::string(vName);
986 const auto h_title = h_name + ";" + h_name + ";count";
987 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
988 }
989
990 ////////////////////////////////////////////////////////////////////////////
991 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
992 /// \tparam V The type of the column used to fill the histogram.
993 /// \tparam W The type of the column used as weights.
994 /// \param[in] model The returned histogram will be constructed using this as a model.
995 /// \param[in] vName The name of the column that will fill the histogram.
996 /// \param[in] wName The name of the column that will provide the weights.
997 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
998 ///
999 /// See the description of the first Histo1D overload for more details.
1000 ///
1001 /// ### Example usage:
1002 /// ~~~{.cpp}
1003 /// // Deduce column type (this invocation needs jitting internally)
1004 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1005 /// // Explicit column type
1006 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1007 /// ~~~
1008 ///
1009 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1011 {
1012 const std::vector<std::string_view> columnViews = {vName, wName};
1013 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1014 ? ColumnNames_t()
1015 : ColumnNames_t(columnViews.begin(), columnViews.end());
1016 std::shared_ptr<::TH1D> h(nullptr);
1017 {
1018 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1019 h = model.GetHistogram();
1020 }
1021 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h);
1022 }
1023
1024 ////////////////////////////////////////////////////////////////////////////
1025 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1026 /// \tparam V The type of the column used to fill the histogram.
1027 /// \tparam W The type of the column used as weights.
1028 /// \param[in] vName The name of the column that will fill the histogram.
1029 /// \param[in] wName The name of the column that will provide the weights.
1030 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1031 ///
1032 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1033 /// The "name" and "title" strings are built starting from the input column names.
1034 /// See the description of the first Histo1D overload for more details.
1035 ///
1036 /// ### Example usage:
1037 /// ~~~{.cpp}
1038 /// // Deduce column types (this invocation needs jitting internally)
1039 /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1040 /// // Explicit column types
1041 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1042 /// ~~~
1043 ///
1044 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1046 {
1047 // We build name and title based on the value and weight column names
1048 std::string str_vName{vName};
1049 std::string str_wName{wName};
1050 const auto h_name = str_vName + "_weighted_" + str_wName;
1051 const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1052 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1053 }
1054
1055 ////////////////////////////////////////////////////////////////////////////
1056 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1057 /// \tparam V The type of the column used to fill the histogram.
1058 /// \tparam W The type of the column used as weights.
1059 /// \param[in] model The returned histogram will be constructed using this as a model.
1060 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1061 ///
1062 /// This overload will use the first two default columns as column names.
1063 /// See the description of the first Histo1D overload for more details.
1064 template <typename V, typename W>
1065 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1066 {
1067 return Histo1D<V, W>(model, "", "");
1068 }
1069
1070 ////////////////////////////////////////////////////////////////////////////
1071 /// \brief Fill and return a two-dimensional histogram (*lazy action*)
1072 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1073 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1074 /// \param[in] model The returned histogram will be constructed using this as a model.
1075 /// \param[in] v1Name The name of the column that will fill the x axis.
1076 /// \param[in] v2Name The name of the column that will fill the y axis.
1077 /// \return the bidimensional histogram wrapped in a `RResultPtr`.
1078 ///
1079 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1080 /// is filled with each one of the elements of the container. In case multiple columns of container type
1081 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1082 /// possibly different lengths between events).
1083 /// This action is *lazy*: upon invocation of this method the calculation is
1084 /// booked but not executed. See RResultPtr documentation.
1085 ///
1086 /// ### Example usage:
1087 /// ~~~{.cpp}
1088 /// // Deduce column types (this invocation needs jitting internally)
1089 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1090 /// // Explicit column types
1091 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1092 /// ~~~
1093 ///
1094 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1096 {
1097 std::shared_ptr<::TH2D> h(nullptr);
1098 {
1099 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1100 h = model.GetHistogram();
1101 }
1102 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1103 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1104 }
1105 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1106 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1107 ? ColumnNames_t()
1108 : ColumnNames_t(columnViews.begin(), columnViews.end());
1109 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h);
1110 }
1111
1112 ////////////////////////////////////////////////////////////////////////////
1113 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*)
1114 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1115 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1116 /// \tparam W The type of the column used for the weights of the histogram.
1117 /// \param[in] model The returned histogram will be constructed using this as a model.
1118 /// \param[in] v1Name The name of the column that will fill the x axis.
1119 /// \param[in] v2Name The name of the column that will fill the y axis.
1120 /// \param[in] wName The name of the column that will provide the weights.
1121 /// \return the bidimensional histogram wrapped in a `RResultPtr`.
1122 ///
1123 /// This action is *lazy*: upon invocation of this method the calculation is
1124 /// booked but not executed. See RResultPtr documentation.
1125 /// The user gives up ownership of the model histogram.
1126 ///
1127 /// ### Example usage:
1128 /// ~~~{.cpp}
1129 /// // Deduce column types (this invocation needs jitting internally)
1130 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1131 /// // Explicit column types
1132 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1133 /// ~~~
1134 ///
1135 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1136 typename W = RDFDetail::RInferredType>
1139 {
1140 std::shared_ptr<::TH2D> h(nullptr);
1141 {
1142 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1143 h = model.GetHistogram();
1144 }
1145 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1146 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1147 }
1148 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1149 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1150 ? ColumnNames_t()
1151 : ColumnNames_t(columnViews.begin(), columnViews.end());
1152 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h);
1153 }
1154
1155 template <typename V1, typename V2, typename W>
1157 {
1158 return Histo2D<V1, V2, W>(model, "", "", "");
1159 }
1160
1161 ////////////////////////////////////////////////////////////////////////////
1162 /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1163 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1164 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1165 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1166 /// \param[in] model The returned histogram will be constructed using this as a model.
1167 /// \param[in] v1Name The name of the column that will fill the x axis.
1168 /// \param[in] v2Name The name of the column that will fill the y axis.
1169 /// \param[in] v3Name The name of the column that will fill the z axis.
1170 /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1171 ///
1172 /// This action is *lazy*: upon invocation of this method the calculation is
1173 /// booked but not executed. See RResultPtr documentation.
1174 ///
1175 /// ### Example usage:
1176 /// ~~~{.cpp}
1177 /// // Deduce column types (this invocation needs jitting internally)
1178 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1179 /// "myValueX", "myValueY", "myValueZ");
1180 /// // Explicit column types
1181 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1182 /// "myValueX", "myValueY", "myValueZ");
1183 /// ~~~
1184 ///
1185 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1186 typename V3 = RDFDetail::RInferredType>
1188 std::string_view v3Name = "")
1189 {
1190 std::shared_ptr<::TH3D> h(nullptr);
1191 {
1192 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1193 h = model.GetHistogram();
1194 }
1195 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1196 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1197 }
1198 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1199 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1200 ? ColumnNames_t()
1201 : ColumnNames_t(columnViews.begin(), columnViews.end());
1202 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h);
1203 }
1204
1205 ////////////////////////////////////////////////////////////////////////////
1206 /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1207 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1208 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1209 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1210 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1211 /// \param[in] model The returned histogram will be constructed using this as a model.
1212 /// \param[in] v1Name The name of the column that will fill the x axis.
1213 /// \param[in] v2Name The name of the column that will fill the y axis.
1214 /// \param[in] v3Name The name of the column that will fill the z axis.
1215 /// \param[in] wName The name of the column that will provide the weights.
1216 /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1217 ///
1218 /// This action is *lazy*: upon invocation of this method the calculation is
1219 /// booked but not executed. See RResultPtr documentation.
1220 ///
1221 /// ### Example usage:
1222 /// ~~~{.cpp}
1223 /// // Deduce column types (this invocation needs jitting internally)
1224 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1225 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1226 /// // Explicit column types
1227 /// using d_t = double;
1228 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1229 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1230 /// ~~~
1231 ///
1232 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1233 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1235 std::string_view v3Name, std::string_view wName)
1236 {
1237 std::shared_ptr<::TH3D> h(nullptr);
1238 {
1239 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1240 h = model.GetHistogram();
1241 }
1242 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1243 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1244 }
1245 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1246 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1247 ? ColumnNames_t()
1248 : ColumnNames_t(columnViews.begin(), columnViews.end());
1249 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h);
1250 }
1251
1252 template <typename V1, typename V2, typename V3, typename W>
1254 {
1255 return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1256 }
1257
1258 ////////////////////////////////////////////////////////////////////////////
1259 /// \brief Fill and return a graph (*lazy action*)
1260 /// \tparam V1 The type of the column used to fill the x axis of the graph.
1261 /// \tparam V2 The type of the column used to fill the y axis of the graph.
1262 /// \param[in] v1Name The name of the column that will fill the x axis.
1263 /// \param[in] v2Name The name of the column that will fill the y axis.
1264 /// \return the graph wrapped in a `RResultPtr`.
1265 ///
1266 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1267 /// is filled with each one of the elements of the container.
1268 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1269 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1270 /// A name and a title to the graph is given based on the input column names.
1271 ///
1272 /// This action is *lazy*: upon invocation of this method the calculation is
1273 /// booked but not executed. See RResultPtr documentation.
1274 ///
1275 /// ### Example usage:
1276 /// ~~~{.cpp}
1277 /// // Deduce column types (this invocation needs jitting internally)
1278 /// auto myGraph1 = myDf.Graph("xValues", "yValues");
1279 /// // Explicit column types
1280 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
1281 /// ~~~
1282 ///
1283 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1285 {
1286 auto graph = std::make_shared<::TGraph>();
1287 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1288 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1289 ? ColumnNames_t()
1290 : ColumnNames_t(columnViews.begin(), columnViews.end());
1291
1292 const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
1293
1294 // We build a default name and title based on the input columns
1295 if (!(validatedColumns[0].empty() && validatedColumns[1].empty())) {
1296 const auto g_name = std::string(v1Name) + "_vs_" + std::string(v2Name);
1297 const auto g_title = std::string(v1Name) + " vs " + std::string(v2Name);
1298 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
1299 graph->GetXaxis()->SetTitle(std::string(v1Name).c_str());
1300 graph->GetYaxis()->SetTitle(std::string(v2Name).c_str());
1301 }
1302
1303 return CreateAction<RDFInternal::ActionTags::Graph, V1, V2>(validatedColumns, graph);
1304 }
1305
1306 ////////////////////////////////////////////////////////////////////////////
1307 /// \brief Fill and return a one-dimensional profile (*lazy action*)
1308 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1309 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1310 /// \param[in] model The model to be considered to build the new return value.
1311 /// \param[in] v1Name The name of the column that will fill the x axis.
1312 /// \param[in] v2Name The name of the column that will fill the y axis.
1313 /// \return the monodimensional profile wrapped in a `RResultPtr`.
1314 ///
1315 /// This action is *lazy*: upon invocation of this method the calculation is
1316 /// booked but not executed. See RResultPtr documentation.
1317 ///
1318 /// ### Example usage:
1319 /// ~~~{.cpp}
1320 /// // Deduce column types (this invocation needs jitting internally)
1321 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1322 /// // Explicit column types
1323 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1324 /// ~~~
1325 ///
1326 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1328 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1329 {
1330 std::shared_ptr<::TProfile> h(nullptr);
1331 {
1332 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1333 h = model.GetProfile();
1334 }
1335
1336 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1337 throw std::runtime_error("Profiles with no axes limits are not supported yet.");
1338 }
1339 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1340 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1341 ? ColumnNames_t()
1342 : ColumnNames_t(columnViews.begin(), columnViews.end());
1343 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h);
1344 }
1345
1346 ////////////////////////////////////////////////////////////////////////////
1347 /// \brief Fill and return a one-dimensional profile (*lazy action*)
1348 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1349 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1350 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
1351 /// \param[in] model The model to be considered to build the new return value.
1352 /// \param[in] v1Name The name of the column that will fill the x axis.
1353 /// \param[in] v2Name The name of the column that will fill the y axis.
1354 /// \param[in] wName The name of the column that will provide the weights.
1355 /// \return the monodimensional profile wrapped in a `RResultPtr`.
1356 ///
1357 /// This action is *lazy*: upon invocation of this method the calculation is
1358 /// booked but not executed. See RResultPtr documentation.
1359 ///
1360 /// ### Example usage:
1361 /// ~~~{.cpp}
1362 /// // Deduce column types (this invocation needs jitting internally)
1363 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
1364 /// // Explicit column types
1365 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
1366 /// "xValues", "yValues", "weight");
1367 /// ~~~
1368 ///
1369 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1370 typename W = RDFDetail::RInferredType>
1373 {
1374 std::shared_ptr<::TProfile> h(nullptr);
1375 {
1376 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1377 h = model.GetProfile();
1378 }
1379
1380 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1381 throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
1382 }
1383 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1384 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1385 ? ColumnNames_t()
1386 : ColumnNames_t(columnViews.begin(), columnViews.end());
1387 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h);
1388 }
1389
1390 template <typename V1, typename V2, typename W>
1392 {
1393 return Profile1D<V1, V2, W>(model, "", "", "");
1394 }
1395
1396 ////////////////////////////////////////////////////////////////////////////
1397 /// \brief Fill and return a two-dimensional profile (*lazy action*)
1398 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1399 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1400 /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1401 /// \param[in] model The returned profile will be constructed using this as a model.
1402 /// \param[in] v1Name The name of the column that will fill the x axis.
1403 /// \param[in] v2Name The name of the column that will fill the y axis.
1404 /// \param[in] v3Name The name of the column that will fill the z axis.
1405 /// \return the bidimensional profile wrapped in a `RResultPtr`.
1406 ///
1407 /// This action is *lazy*: upon invocation of this method the calculation is
1408 /// booked but not executed. See RResultPtr documentation.
1409 ///
1410 /// ### Example usage:
1411 /// ~~~{.cpp}
1412 /// // Deduce column types (this invocation needs jitting internally)
1413 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1414 /// "xValues", "yValues", "zValues");
1415 /// // Explicit column types
1416 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1417 /// "xValues", "yValues", "zValues");
1418 /// ~~~
1419 ///
1420 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1421 typename V3 = RDFDetail::RInferredType>
1423 std::string_view v2Name = "", std::string_view v3Name = "")
1424 {
1425 std::shared_ptr<::TProfile2D> h(nullptr);
1426 {
1427 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1428 h = model.GetProfile();
1429 }
1430
1431 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1432 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1433 }
1434 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1435 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1436 ? ColumnNames_t()
1437 : ColumnNames_t(columnViews.begin(), columnViews.end());
1438 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h);
1439 }
1440
1441 ////////////////////////////////////////////////////////////////////////////
1442 /// \brief Fill and return a two-dimensional profile (*lazy action*)
1443 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1444 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1445 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1446 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1447 /// \param[in] model The returned histogram will be constructed using this as a model.
1448 /// \param[in] v1Name The name of the column that will fill the x axis.
1449 /// \param[in] v2Name The name of the column that will fill the y axis.
1450 /// \param[in] v3Name The name of the column that will fill the z axis.
1451 /// \param[in] wName The name of the column that will provide the weights.
1452 /// \return the bidimensional profile wrapped in a `RResultPtr`.
1453 ///
1454 /// This action is *lazy*: upon invocation of this method the calculation is
1455 /// booked but not executed. See RResultPtr documentation.
1456 ///
1457 /// ### Example usage:
1458 /// ~~~{.cpp}
1459 /// // Deduce column types (this invocation needs jitting internally)
1460 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1461 /// "xValues", "yValues", "zValues", "weight");
1462 /// // Explicit column types
1463 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1464 /// "xValues", "yValues", "zValues", "weight");
1465 /// ~~~
1466 ///
1467 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1468 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1470 std::string_view v3Name, std::string_view wName)
1471 {
1472 std::shared_ptr<::TProfile2D> h(nullptr);
1473 {
1474 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1475 h = model.GetProfile();
1476 }
1477
1478 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1479 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1480 }
1481 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1482 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1483 ? ColumnNames_t()
1484 : ColumnNames_t(columnViews.begin(), columnViews.end());
1485 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h);
1486 }
1487
1488 template <typename V1, typename V2, typename V3, typename W>
1490 {
1491 return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
1492 }
1493
1494 ////////////////////////////////////////////////////////////////////////////
1495 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1496 ///
1497 /// T must be a type that provides a copy- or move-constructor and a `T::Fill` method that takes as many arguments
1498 /// as the column names pass as columnList. The arguments of `T::Fill` must have type equal to the one of the
1499 /// specified columns (these types are passed as template parameters to this method).
1500 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object.
1501 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
1502 /// \tparam T The type of the object to fill. Automatically deduced.
1503 /// \param[in] model The model to be considered to build the new return value.
1504 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1505 /// \return the filled object wrapped in a `RResultPtr`.
1506 ///
1507 /// The user gives up ownership of the model object.
1508 /// The list of column names to be used for filling must always be specified.
1509 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
1510 /// See RResultPtr documentation.
1511 ///
1512 /// ### Example usage:
1513 /// ~~~{.cpp}
1514 /// MyClass obj;
1515 /// auto myFilledObj = myDf.Fill<float>(obj, {"col0", "col1"});
1516 /// ~~~
1517 ///
1518 template <typename FirstColumn, typename... OtherColumns, typename T> // need FirstColumn to disambiguate overloads
1519 RResultPtr<T> Fill(T &&model, const ColumnNames_t &columnList)
1520 {
1521 auto h = std::make_shared<T>(std::forward<T>(model));
1522 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1523 throw std::runtime_error("The absence of axes limits is not supported yet.");
1524 }
1525 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h);
1526 }
1527
1528 ////////////////////////////////////////////////////////////////////////////
1529 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1530 ///
1531 /// This overload infers the types of the columns specified in columnList at runtime and just-in-time compiles the
1532 /// method with these types. See previous overload for more information.
1533 /// \tparam T The type of the object to fill. Automatically deduced.
1534 /// \param[in] model The model to be considered to build the new return value.
1535 /// \param[in] columnList The name of the columns read to fill the object.
1536 /// \return the filled object wrapped in a `RResultPtr`.
1537 ///
1538 /// This overload of `Fill` infers the type of the specified columns at runtime and just-in-time compiles the
1539 /// previous overload. Check the previous overload for more details on `Fill`.
1540 ///
1541 /// ### Example usage:
1542 /// ~~~{.cpp}
1543 /// MyClass obj;
1544 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
1545 /// ~~~
1546 ///
1547 template <typename T>
1548 RResultPtr<T> Fill(T &&model, const ColumnNames_t &bl)
1549 {
1550 auto h = std::make_shared<T>(std::forward<T>(model));
1551 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1552 throw std::runtime_error("The absence of axes limits is not supported yet.");
1553 }
1554 return CreateAction<RDFInternal::ActionTags::Fill, RDFDetail::RInferredType>(bl, h, bl.size());
1555 }
1556
1557 ////////////////////////////////////////////////////////////////////////////
1558 /// \brief Return a TStatistic object, filled once per event (*lazy action*)
1559 ///
1560 /// \tparam V The type of the value column
1561 /// \param[in] value The name of the column with the values to fill the statistics with.
1562 /// \return the filled TStatistic object wrapped in a `RResultPtr`.
1563 ///
1564 /// ### Example usage:
1565 /// ~~~{.cpp}
1566 /// // Deduce column type (this invocation needs jitting internally)
1567 /// auto stats0 = myDf.Stats("values");
1568 /// // Explicit column type
1569 /// auto stats1 = myDf.Stats<float>("values");
1570 /// ~~~
1571 ///
1572 template<typename V = RDFDetail::RInferredType>
1574 {
1575 ColumnNames_t columns;
1576 if (!value.empty()) {
1577 columns.emplace_back(std::string(value));
1578 }
1579 const auto validColumnNames = GetValidatedColumnNames(1, columns);
1580 if (std::is_same<V, RDFDetail::RInferredType>::value) {
1581 return Fill(TStatistic(), validColumnNames);
1582 }
1583 else {
1584 return Fill<V>(TStatistic(), validColumnNames);
1585 }
1586 }
1587
1588 ////////////////////////////////////////////////////////////////////////////
1589 /// \brief Return a TStatistic object, filled once per event (*lazy action*)
1590 ///
1591 /// \tparam V The type of the value column
1592 /// \tparam W The type of the weight column
1593 /// \param[in] value The name of the column with the values to fill the statistics with.
1594 /// \param[in] weight The name of the column with the weights to fill the statistics with.
1595 /// \return the filled TStatistic object wrapped in a `RResultPtr`.
1596 ///
1597 /// ### Example usage:
1598 /// ~~~{.cpp}
1599 /// // Deduce column types (this invocation needs jitting internally)
1600 /// auto stats0 = myDf.Stats("values", "weights");
1601 /// // Explicit column types
1602 /// auto stats1 = myDf.Stats<int, float>("values", "weights");
1603 /// ~~~
1604 ///
1605 template<typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1607 {
1608 ColumnNames_t columns {std::string(value), std::string(weight)};
1609 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
1610 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
1611 const auto validColumnNames = GetValidatedColumnNames(2, columns);
1612 // We have 3 cases:
1613 // 1. Both types are inferred: we use Fill and let the jit kick in.
1614 // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
1615 // 3. Both types are explicit: we invoke the fully compiled Fill method.
1616 if (vIsInferred && wIsInferred) {
1617 return Fill(TStatistic(), validColumnNames);
1618 } else if (vIsInferred != wIsInferred) {
1619 std::string error("The ");
1620 error += vIsInferred ? "value " : "weight ";
1621 error += "column type is explicit, while the ";
1622 error += vIsInferred ? "weight " : "value ";
1623 error += " is specified to be inferred. This case is not supported: please specify both types or none.";
1624 throw std::runtime_error(error);
1625 } else {
1626 return Fill<V, W>(TStatistic(), validColumnNames);
1627 }
1628 }
1629
1630 ////////////////////////////////////////////////////////////////////////////
1631 /// \brief Return the minimum of processed column values (*lazy action*)
1632 /// \tparam T The type of the branch/column.
1633 /// \param[in] columnName The name of the branch/column to be treated.
1634 /// \return the minimum value of the selected column wrapped in a `RResultPtr`.
1635 ///
1636 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1637 /// template specialization of this method.
1638 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1639 ///
1640 /// This action is *lazy*: upon invocation of this method the calculation is
1641 /// booked but not executed. See RResultPtr documentation.
1642 ///
1643 /// ### Example usage:
1644 /// ~~~{.cpp}
1645 /// // Deduce column type (this invocation needs jitting internally)
1646 /// auto minVal0 = myDf.Min("values");
1647 /// // Explicit column type
1648 /// auto minVal1 = myDf.Min<double>("values");
1649 /// ~~~
1650 ///
1651 template <typename T = RDFDetail::RInferredType>
1653 {
1654 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1655 using RetType_t = RDFDetail::MinReturnType_t<T>;
1656 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
1657 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV);
1658 }
1659
1660 ////////////////////////////////////////////////////////////////////////////
1661 /// \brief Return the maximum of processed column values (*lazy action*)
1662 /// \tparam T The type of the branch/column.
1663 /// \param[in] columnName The name of the branch/column to be treated.
1664 /// \return the maximum value of the selected column wrapped in a `RResultPtr`.
1665 ///
1666 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1667 /// template specialization of this method.
1668 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1669 ///
1670 /// This action is *lazy*: upon invocation of this method the calculation is
1671 /// booked but not executed. See RResultPtr documentation.
1672 ///
1673 /// ### Example usage:
1674 /// ~~~{.cpp}
1675 /// // Deduce column type (this invocation needs jitting internally)
1676 /// auto maxVal0 = myDf.Max("values");
1677 /// // Explicit column type
1678 /// auto maxVal1 = myDf.Max<double>("values");
1679 /// ~~~
1680 ///
1681 template <typename T = RDFDetail::RInferredType>
1683 {
1684 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1685 using RetType_t = RDFDetail::MaxReturnType_t<T>;
1686 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
1687 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV);
1688 }
1689
1690 ////////////////////////////////////////////////////////////////////////////
1691 /// \brief Return the mean of processed column values (*lazy action*)
1692 /// \tparam T The type of the branch/column.
1693 /// \param[in] columnName The name of the branch/column to be treated.
1694 /// \return the mean value of the selected column wrapped in a `RResultPtr`.
1695 ///
1696 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1697 /// template specialization of this method.
1698 ///
1699 /// This action is *lazy*: upon invocation of this method the calculation is
1700 /// booked but not executed. See RResultPtr documentation.
1701 ///
1702 /// ### Example usage:
1703 /// ~~~{.cpp}
1704 /// // Deduce column type (this invocation needs jitting internally)
1705 /// auto meanVal0 = myDf.Mean("values");
1706 /// // Explicit column type
1707 /// auto meanVal1 = myDf.Mean<double>("values");
1708 /// ~~~
1709 ///
1710 template <typename T = RDFDetail::RInferredType>
1712 {
1713 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1714 auto meanV = std::make_shared<double>(0);
1715 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV);
1716 }
1717
1718 ////////////////////////////////////////////////////////////////////////////
1719 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*)
1720 /// \tparam T The type of the branch/column.
1721 /// \param[in] columnName The name of the branch/column to be treated.
1722 /// \return the standard deviation value of the selected column wrapped in a `RResultPtr`.
1723 ///
1724 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1725 /// template specialization of this method.
1726 ///
1727 /// This action is *lazy*: upon invocation of this method the calculation is
1728 /// booked but not executed. See RResultPtr documentation.
1729 ///
1730 /// ### Example usage:
1731 /// ~~~{.cpp}
1732 /// // Deduce column type (this invocation needs jitting internally)
1733 /// auto stdDev0 = myDf.StdDev("values");
1734 /// // Explicit column type
1735 /// auto stdDev1 = myDf.StdDev<double>("values");
1736 /// ~~~
1737 ///
1738 template <typename T = RDFDetail::RInferredType>
1740 {
1741 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1742 auto stdDeviationV = std::make_shared<double>(0);
1743 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV);
1744 }
1745
1746 // clang-format off
1747 ////////////////////////////////////////////////////////////////////////////
1748 /// \brief Return the sum of processed column values (*lazy action*)
1749 /// \tparam T The type of the branch/column.
1750 /// \param[in] columnName The name of the branch/column.
1751 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
1752 /// \return the sum of the selected column wrapped in a `RResultPtr`.
1753 ///
1754 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1755 /// template specialization of this method.
1756 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1757 ///
1758 /// This action is *lazy*: upon invocation of this method the calculation is
1759 /// booked but not executed. See RResultPtr documentation.
1760 ///
1761 /// ### Example usage:
1762 /// ~~~{.cpp}
1763 /// // Deduce column type (this invocation needs jitting internally)
1764 /// auto sum0 = myDf.Sum("values");
1765 /// // Explicit column type
1766 /// auto sum1 = myDf.Sum<double>("values");
1767 /// ~~~
1768 ///
1769 template <typename T = RDFDetail::RInferredType>
1771 Sum(std::string_view columnName = "",
1772 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
1773 {
1774 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1775 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
1776 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV);
1777 }
1778 // clang-format on
1779
1780 ////////////////////////////////////////////////////////////////////////////
1781 /// \brief Gather filtering statistics
1782 /// \return the resulting `RCutFlowReport` instance wrapped in a `RResultPtr`.
1783 ///
1784 /// Calling `Report` on the main `RDataFrame` object gathers stats for
1785 /// all named filters in the call graph. Calling this method on a
1786 /// stored chain state (i.e. a graph node different from the first) gathers
1787 /// the stats for all named filters in the chain section between the original
1788 /// `RDataFrame` and that node (included). Stats are gathered in the same
1789 /// order as the named filters have been added to the graph.
1790 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
1791 /// effects cuts had.
1792 ///
1793 /// This action is *lazy*: upon invocation of
1794 /// this method the calculation is booked but not executed. See RResultPtr
1795 /// documentation.
1796 ///
1797 /// ### Example usage:
1798 /// ~~~{.cpp}
1799 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
1800 /// auto cutReport = filtered3.Report();
1801 /// cutReport->Print();
1802 /// ~~~
1803 ///
1805 {
1806 bool returnEmptyReport = false;
1807 // if this is a RInterface<RLoopManager> on which `Define` has been called, users
1808 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
1809 // certainly does not contain named filters.
1810 // The number 4 takes into account the implicit columns for entry and slot number
1811 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
1812 if (std::is_same<Proxied, RLoopManager>::value && fCustomColumns.GetNames().size() > 4)
1813 returnEmptyReport = true;
1814
1815 auto rep = std::make_shared<RCutFlowReport>();
1816 using Helper_t = RDFInternal::ReportHelper<Proxied>;
1818
1819 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}),
1821
1822 fLoopManager->Book(action.get());
1823 return MakeResultPtr(rep, *fLoopManager, std::move(action));
1824 }
1825
1826 /////////////////////////////////////////////////////////////////////////////
1827 /// \brief Returns the names of the available columns
1828 /// \return the container of column names.
1829 ///
1830 /// This is not an action nor a transformation, just a query to the RDataFrame object.
1831 ///
1832 /// ### Example usage:
1833 /// ~~~{.cpp}
1834 /// auto colNames = d.GetColumnNames();
1835 /// // Print columns' names
1836 /// for (auto &&colName : colNames) std::cout << colName << std::endl;
1837 /// ~~~
1838 ///
1840 {
1841 ColumnNames_t allColumns;
1842
1843 auto addIfNotInternal = [&allColumns](std::string_view colName) {
1844 if (!RDFInternal::IsInternalColumn(colName))
1845 allColumns.emplace_back(colName);
1846 };
1847
1848 auto columnNames = fCustomColumns.GetNames();
1849
1850 std::for_each(columnNames.begin(), columnNames.end(), addIfNotInternal);
1851
1852 auto tree = fLoopManager->GetTree();
1853 if (tree) {
1854 auto branchNames = RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false);
1855 allColumns.insert(allColumns.end(), branchNames.begin(), branchNames.end());
1856 }
1857
1858 if (fDataSource) {
1859 auto &dsColNames = fDataSource->GetColumnNames();
1860 allColumns.insert(allColumns.end(), dsColNames.begin(), dsColNames.end());
1861 }
1862
1863 return allColumns;
1864 }
1865
1866 /////////////////////////////////////////////////////////////////////////////
1867 /// \brief Return the type of a given column as a string.
1868 /// \return the type of the required column.
1869 ///
1870 /// This is not an action nor a transformation, just a query to the RDataFrame object.
1871 ///
1872 /// ### Example usage:
1873 /// ~~~{.cpp}
1874 /// auto colType = d.GetColumnType("columnName");
1875 /// // Print column type
1876 /// std::cout << "Column " << colType << " has type " << colType << std::endl;
1877 /// ~~~
1878 ///
1880 {
1881 const auto &customCols = fCustomColumns.GetNames();
1882 const bool convertVector2RVec = true;
1883 const auto isCustom = std::find(customCols.begin(), customCols.end(), column) != customCols.end();
1884 if (!isCustom) {
1885 return RDFInternal::ColumnName2ColumnTypeName(std::string(column), fLoopManager->GetID(),
1887 convertVector2RVec);
1888 } else {
1889 // must convert the alias "__rdf::column_type" to a readable type
1890 const auto colID = std::to_string(fCustomColumns.GetColumns().at(std::string(column))->GetID());
1891 const auto call = "ROOT::Internal::RDF::TypeID2TypeName(typeid(__rdf" + std::to_string(fLoopManager->GetID()) +
1892 "::" + std::string(column) + colID + "_type))";
1893 fLoopManager->JitDeclarations(); // some type aliases might be needed by the code jitted in the next line
1894 const auto calcRes = RDFInternal::InterpreterCalc(call);
1895 return *reinterpret_cast<std::string *>(calcRes); // copy result to stack
1896 }
1897 }
1898
1899 /// \brief Returns the names of the filters created.
1900 /// \return the container of filters names.
1901 ///
1902 /// If called on a root node, all the filters in the computation graph will
1903 /// be printed. For any other node, only the filters upstream of that node.
1904 /// Filters without a name are printed as "Unnamed Filter"
1905 /// This is not an action nor a transformation, just a query to the RDataFrame object.
1906 ///
1907 /// ### Example usage:
1908 /// ~~~{.cpp}
1909 /// auto filtNames = d.GetFilterNames();
1910 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
1911 /// ~~~
1912 ///
1913 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
1914
1915 /// \brief Returns the names of the defined columns
1916 /// \return the container of the defined column names.
1917 ///
1918 /// This is not an action nor a transformation, just a simple utility to
1919 /// get the columns names that have been defined up to the node.
1920 /// If no custom column has been defined, e.g. on a root node, it returns an
1921 /// empty collection.
1922 ///
1923 /// ### Example usage:
1924 /// ~~~{.cpp}
1925 /// auto defColNames = d.GetDefinedColumnNames();
1926 /// // Print defined columns' names
1927 /// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
1928 /// ~~~
1929 ///
1931 {
1932 ColumnNames_t definedColumns;
1933
1934 auto columns = fCustomColumns.GetColumns();
1935
1936 for (auto column : columns) {
1937 if (!RDFInternal::IsInternalColumn(column.first) && !column.second->IsDataSourceColumn())
1938 definedColumns.emplace_back(column.first);
1939 }
1940
1941 return definedColumns;
1942 }
1943
1944 /// \brief Checks if a column is present in the dataset
1945 /// \return true if the column is available, false otherwise
1946 ///
1947 /// This method checks if a column is part of the input ROOT dataset, has
1948 /// been defined or can be provided by the data source.
1949 ///
1950 /// Example usage:
1951 /// ~~~{.cpp}
1952 /// ROOT::RDataFrame base(1);
1953 /// auto rdf = base.Define("definedColumn", [](){return 0;});
1954 /// rdf.HasColumn("definedColumn"); // true: we defined it
1955 /// rdf.HasColumn("rdfentry_"); // true: it's always there
1956 /// rdf.HasColumn("foo"); // false: it is not there
1957 /// ~~~
1959 {
1960 if (fCustomColumns.HasName(columnName))
1961 return true;
1962
1963 if (auto tree = fLoopManager->GetTree()) {
1964 const auto &branchNames = fLoopManager->GetBranchNames();
1965 const auto branchNamesEnd = branchNames.end();
1966 if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
1967 return true;
1968 }
1969
1970 if (fDataSource && fDataSource->HasColumn(columnName))
1971 return true;
1972
1973 return false;
1974 }
1975
1976 /// \brief Gets the number of data processing slots
1977 /// \return The number of data processing slots used by this RDataFrame instance
1978 ///
1979 /// This method returns the number of data processing slots used by this RDataFrame
1980 /// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
1981 ///
1982 /// Example usage:
1983 /// ~~~{.cpp}
1984 /// ROOT::EnableImplicitMT(6)
1985 /// ROOT::RDataFrame df(1);
1986 /// std::cout << df.GetNSlots() << std::endl; // prints "6"
1987 /// ~~~
1988 unsigned int GetNSlots() const { return fLoopManager->GetNSlots(); }
1989
1990 // clang-format off
1991 ////////////////////////////////////////////////////////////////////////////
1992 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
1993 /// \tparam F The type of the aggregator callable. Automatically deduced.
1994 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
1995 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1996 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
1997 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
1998 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
1999 /// \param[in] aggIdentity The aggregator variable of each thread is initialised to this value (or is default-constructed if the parameter is omitted)
2000 /// \return the result of the aggregation wrapped in a `RResultPtr`.
2001 ///
2002 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2003 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2004 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2005 /// the value of the column columnName.
2006 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2007 /// Otherwise the signature of aggregator must be `void(U&,T)`.
2008 ///
2009 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2010 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2011 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2012 ///
2013 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2014 ///
2015 /// Example usage:
2016 /// ~~~{.cpp}
2017 /// auto aggregator = [](double acc, double x) { return acc * x; };
2018 /// ROOT::EnableImplicitMT();
2019 /// // If multithread is enabled, the aggregator function will be called by more threads
2020 /// // and will produce a vector of partial accumulators.
2021 /// // The merger function performs the final aggregation of these partial results.
2022 /// auto merger = [](std::vector<double> &accumulators) {
2023 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2024 /// accumulators[0] *= accumulators[i];
2025 /// }
2026 /// };
2027 ///
2028 /// // The accumulator is initialized at this value by every thread.
2029 /// double initValue = 1.;
2030 ///
2031 /// // Multiplies all elements of the column "x"
2032 /// auto result = d.Aggregate(aggregator, merger, columnName, initValue);
2033 /// ~~~
2034 // clang-format on
2035 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2036 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2037 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2038 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2039 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2040 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2041 {
2042 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2043 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2044 constexpr auto nColumns = ArgTypes::list_size;
2045
2046 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2047
2048 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ArgTypes());
2049
2050 auto accObjPtr = std::make_shared<U>(aggIdentity);
2051 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2052 using Action_t = typename RDFInternal::RAction<Helper_t, Proxied>;
2053 auto action = std::make_unique<Action_t>(
2054 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2055 fProxiedPtr, std::move(newColumns));
2056 fLoopManager->Book(action.get());
2057 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2058 }
2059
2060 // clang-format off
2061 ////////////////////////////////////////////////////////////////////////////
2062 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
2063 /// \tparam F The type of the aggregator callable. Automatically deduced.
2064 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2065 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2066 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2067 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2068 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2069 /// \return the result of the aggregation wrapped in a `RResultPtr`.
2070 ///
2071 /// See previous Aggregate overload for more information.
2072 // clang-format on
2073 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2074 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2075 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2076 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2077 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2078 {
2079 static_assert(
2080 std::is_default_constructible<U>::value,
2081 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2082 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2083 }
2084
2085 // clang-format off
2086 ////////////////////////////////////////////////////////////////////////////
2087 /// \brief Book execution of a custom action using a user-defined helper object.
2088 /// \tparam ColumnTypes List of types of columns used by this action.
2089 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2090 /// \param[in] helper The Action Helper to be scheduled.
2091 /// \param[in] columns The names of the columns on which the helper acts.
2092 /// \return the result of the helper wrapped in a `RResultPtr`.
2093 ///
2094 /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2095 /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
2096 /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2097 ///
2098 /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
2099 /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
2100 /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
2101 /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
2102 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2103 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2104 /// the requested columns for the particular entry being processed.
2105 /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
2106 /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
2107 /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
2108 /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations. Can be no-op.
2109 /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2110 /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
2111 /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
2112 /// method concurrently, but will always pass different 'slot' numbers.
2113 /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
2114 /// Result_t). The RResultPtr returned by Book will point to this object.
2115 ///
2116 /// See ActionHelpers.hxx for the helpers used by standard RDF actions.
2117 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2118 // clang-format on
2119 template <typename... ColumnTypes, typename Helper>
2120 RResultPtr<typename Helper::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {})
2121 {
2122 constexpr auto nColumns = sizeof...(ColumnTypes);
2123 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columns.size());
2124
2125 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2126
2127 // TODO add more static sanity checks on Helper
2128 using AH = RDFDetail::RActionImpl<Helper>;
2129 static_assert(std::is_base_of<AH, Helper>::value && std::is_convertible<Helper *, AH *>::value,
2130 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2131
2132 using Action_t = typename RDFInternal::RAction<Helper, Proxied, TTraits::TypeList<ColumnTypes...>>;
2133 auto resPtr = helper.GetResultPtr();
2134
2135 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
2137
2138 auto action = std::make_unique<Action_t>(Helper(std::forward<Helper>(helper)), validColumnNames, fProxiedPtr,
2140 fLoopManager->Book(action.get());
2141 return MakeResultPtr(resPtr, *fLoopManager, std::move(action));
2142 }
2143
2144 ////////////////////////////////////////////////////////////////////////////
2145 /// \brief Provides a representation of the columns in the dataset
2146 /// \tparam ColumnTypes variadic list of branch/column types.
2147 /// \param[in] columnList Names of the columns to be displayed.
2148 /// \param[in] rows Number of events for each column to be displayed.
2149 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2150 ///
2151 /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
2152 /// form. RDisplay will either print on the standard output a summarized version through `Print()` or will return a
2153 /// complete version through `AsString()`.
2154 ///
2155 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2156 ///
2157 /// Example usage:
2158 /// ~~~{.cpp}
2159 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
2160 /// auto d1 = rdf.Display("");
2161 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
2162 /// auto d2 = d.Display({"x", "y"}, 128);
2163 /// // Printing the short representations, the event loop will run
2164 /// d1->Print();
2165 /// d2->Print();
2166 /// ~~~
2167 template <typename... ColumnTypes>
2168 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2169 {
2170 CheckIMTDisabled("Display");
2171
2172 auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2173 return CreateAction<RDFInternal::ActionTags::Display, ColumnTypes...>(columnList, displayer);
2174 }
2175
2176 ////////////////////////////////////////////////////////////////////////////
2177 /// \brief Provides a representation of the columns in the dataset
2178 /// \param[in] columnList Names of the columns to be displayed.
2179 /// \param[in] rows Number of events for each column to be displayed.
2180 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2181 ///
2182 /// This overload automatically infers the column types.
2183 /// See the previous overloads for further details.
2184 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2185 {
2186 CheckIMTDisabled("Display");
2187 auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2188 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(columnList, displayer,
2189 columnList.size());
2190 }
2191
2192 ////////////////////////////////////////////////////////////////////////////
2193 /// \brief Provides a representation of the columns in the dataset
2194 /// \param[in] columnNameRegexp A regular expression to select the columns.
2195 /// \param[in] rows Number of events for each column to be displayed.
2196 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2197 ///
2198 /// The existing columns are matched against the regular expression. If the string provided
2199 /// is empty, all columns are selected.
2200 /// See the previous overloads for further details.
2201 RResultPtr<RDisplay> Display(std::string_view columnNameRegexp = "", const int &nRows = 5)
2202 {
2204 columnNameRegexp, "Display");
2205 return Display(selectedColumns, nRows);
2206 }
2207
2208 ////////////////////////////////////////////////////////////////////////////
2209 /// \brief Provides a representation of the columns in the dataset
2210 /// \param[in] columnList Names of the columns to be displayed.
2211 /// \param[in] nRows Number of events for each column to be displayed.
2212 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2213 ///
2214 /// See the previous overloads for further details.
2215 RResultPtr<RDisplay> Display(std::initializer_list<std::string> columnList, const int &nRows = 5)
2216 {
2217 ColumnNames_t selectedColumns(columnList);
2218 return Display(selectedColumns, nRows);
2219 }
2220
2221private:
2223 {
2225
2226 // Entry number column
2227 const auto entryColName = "rdfentry_";
2228 auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
2229 using NewColEntry_t =
2230 RDFDetail::RCustomColumn<decltype(entryColGen), RDFDetail::CustomColExtraArgs::SlotAndEntry>;
2231
2232 auto entryColumn = std::make_shared<NewColEntry_t>(fLoopManager, entryColName, std::move(entryColGen),
2233 ColumnNames_t{}, fLoopManager->GetNSlots(), newCols);
2234 newCols.AddName(entryColName);
2235 newCols.AddColumn(entryColumn, entryColName);
2236
2237 // Declare return type to the interpreter, for future use by jitted actions
2238 auto retTypeDeclaration = "namespace __rdf" + std::to_string(fLoopManager->GetID()) + " { using " + entryColName +
2239 std::to_string(entryColumn->GetID()) + "_type = ULong64_t; }";
2240 fLoopManager->ToJitDeclare(retTypeDeclaration);
2241
2242 // Slot number column
2243 const auto slotColName = "rdfslot_";
2244 auto slotColGen = [](unsigned int slot) { return slot; };
2245 using NewColSlot_t = RDFDetail::RCustomColumn<decltype(slotColGen), RDFDetail::CustomColExtraArgs::Slot>;
2246
2247 auto slotColumn = std::make_shared<NewColSlot_t>(fLoopManager, slotColName, std::move(slotColGen),
2248 ColumnNames_t{}, fLoopManager->GetNSlots(), newCols);
2249
2250 newCols.AddName(slotColName);
2251 newCols.AddColumn(slotColumn, slotColName);
2252
2253 fCustomColumns = std::move(newCols);
2254
2255 // Declare return type to the interpreter, for future use by jitted actions
2256 retTypeDeclaration = "namespace __rdf" + std::to_string(fLoopManager->GetID()) + " { using " + slotColName +
2257 std::to_string(slotColumn->GetID()) + "_type = unsigned int; }";
2258 fLoopManager->ToJitDeclare(retTypeDeclaration);
2259
2260 fLoopManager->AddColumnAlias("tdfentry_", entryColName);
2261 fCustomColumns.AddName("tdfentry_");
2262 fLoopManager->AddColumnAlias("tdfslot_", slotColName);
2263 fCustomColumns.AddName("tdfslot_");
2264 }
2265
2266 std::vector<std::string> GetColumnTypeNamesList(const ColumnNames_t &columnList)
2267 {
2268 std::vector<std::string> types;
2269
2270 for (auto column : columnList) {
2271 types.push_back(GetColumnType(column));
2272 }
2273 return types;
2274 }
2275
2277 {
2279 std::string error(callerName);
2280 error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
2281 throw std::runtime_error(error);
2282 }
2283 }
2284
2285 // Type was specified by the user, no need to infer it
2286 template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
2287 typename std::enable_if<!RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
2288 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r)
2289 {
2290 constexpr auto nColumns = sizeof...(BranchTypes);
2291
2292 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2293
2294 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
2296
2297 const auto nSlots = fLoopManager->GetNSlots();
2298
2299 auto action = RDFInternal::BuildAction<BranchTypes...>(validColumnNames, r, nSlots, fProxiedPtr, ActionTag{},
2300 std::move(newColumns));
2301 fLoopManager->Book(action.get());
2302 return MakeResultPtr(r, *fLoopManager, std::move(action));
2303 }
2304
2305 // User did not specify type, do type inference
2306 // This version of CreateAction has a `nColumns` optional argument. If present, the number of required columns for
2307 // this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(BranchTypes)
2308 template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
2309 typename std::enable_if<RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
2311 CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r, const int nColumns = -1)
2312 {
2313 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(BranchTypes));
2314
2315 const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
2316 const unsigned int nSlots = fLoopManager->GetNSlots();
2317
2318 auto tree = fLoopManager->GetTree();
2319 auto rOnHeap = RDFInternal::MakeSharedOnHeap(r);
2320
2321 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
2322 using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
2323 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
2324
2325 auto jittedActionOnHeap =
2326 RDFInternal::MakeSharedOnHeap(std::make_shared<RDFInternal::RJittedAction>(*fLoopManager));
2327
2328 auto toJit = RDFInternal::JitBuildAction(
2329 validColumnNames, upcastNodeOnHeap, typeid(std::shared_ptr<ActionResultType>), typeid(ActionTag), rOnHeap,
2330 tree, nSlots, fCustomColumns, fDataSource, jittedActionOnHeap, fLoopManager->GetID());
2331 fLoopManager->Book(jittedActionOnHeap->get());
2332 fLoopManager->ToJitExec(toJit);
2333 return MakeResultPtr(r, *fLoopManager, *jittedActionOnHeap);
2334 }
2335
2336 template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
2337 typename std::enable_if<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>::type
2338 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
2339 {
2343
2344 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
2345 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
2346 std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::Slot>::value, ArgTypes_t>::type;
2347 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
2348 std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::SlotAndEntry>::value, ColTypesTmp_t>::type;
2349
2350 constexpr auto nColumns = ColTypes_t::list_size;
2351
2352 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2353
2354 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
2355
2357 RDFInternal::RBookedCustomColumns newCols(newColumns);
2358 auto newColumn = std::make_shared<NewCol_t>(fLoopManager, name, std::forward<F>(expression), validColumnNames,
2359 fLoopManager->GetNSlots(), newCols);
2360
2361 // Declare return type to the interpreter, for future use by jitted actions
2362 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
2363 if (retTypeName.empty()) {
2364 // The type is not known to the interpreter.
2365 // Forward-declare it as void + helpful comment, so that if this Define'd quantity is
2366 // ever used by jitted code users will have some way to know what went wrong
2367 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
2368 retTypeName = "void /* The type of column \"" + std::string(name) + "\" (" + demangledType +
2369 ") is not known to the interpreter. */";
2370 }
2371 const auto retTypeDeclaration = "namespace __rdf" + std::to_string(fLoopManager->GetID()) +
2372 " { " + +" using " + std::string(name) + std::to_string(newColumn->GetID()) +
2373 "_type = " + retTypeName + "; }";
2374 fLoopManager->ToJitDeclare(retTypeDeclaration);
2375
2376 newCols.AddName(name);
2377 newCols.AddColumn(newColumn, name);
2378
2379 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
2380
2381 return newInterface;
2382 }
2383
2384 // This overload is chosen when the callable passed to Define or DefineSlot returns void.
2385 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
2386 // this way compilation of `Define` has no way to continue after throwing the error.
2387 template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
2388 bool IsFStringConv = std::is_convertible<F, std::string>::value,
2389 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
2390 typename std::enable_if<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>::type
2392 {
2393 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
2394 "Error in `Define`: type returned by expression is not default-constructible");
2395 return *this; // never reached
2396 }
2397
2398 ////////////////////////////////////////////////////////////////////////////
2399 /// \brief Implementation of snapshot
2400 /// \param[in] treename The name of the TTree
2401 /// \param[in] filename The name of the TFile
2402 /// \param[in] columnList The list of names of the branches to be written
2403 /// The implementation exploits Foreach. The association of the addresses to
2404 /// the branches takes place at the first event. This is possible because
2405 /// since there are no copies, the address of the value passed by reference
2406 /// is the address pointing to the storage of the read/created object in/by
2407 /// the TTreeReaderValue/TemporaryBranch
2408 template <typename... ColumnTypes>
2410 const ColumnNames_t &columnList, const RSnapshotOptions &options)
2411 {
2412 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnList.size());
2413
2414 const auto validCols = GetValidatedColumnNames(columnList.size(), columnList);
2415
2416 auto newColumns = CheckAndFillDSColumns(validCols, std::index_sequence_for<ColumnTypes...>(),
2418
2419 const std::string fullTreename(treename);
2420 // split name into directory and treename if needed
2421 const auto lastSlash = treename.rfind('/');
2422 std::string_view dirname = "";
2423 if (std::string_view::npos != lastSlash) {
2424 dirname = treename.substr(0, lastSlash);
2425 treename = treename.substr(lastSlash + 1, treename.size());
2426 }
2427
2428 // add action node to functional graph and run event loop
2429 std::unique_ptr<RDFInternal::RActionBase> actionPtr;
2431 // single-thread snapshot
2432 using Helper_t = RDFInternal::SnapshotHelper<ColumnTypes...>;
2434 actionPtr.reset(new Action_t(Helper_t(filename, dirname, treename, validCols, columnList, options), validCols,
2435 fProxiedPtr, std::move(newColumns)));
2436 } else {
2437 // multi-thread snapshot
2438 using Helper_t = RDFInternal::SnapshotHelperMT<ColumnTypes...>;
2440 actionPtr.reset(new Action_t(
2441 Helper_t(fLoopManager->GetNSlots(), filename, dirname, treename, validCols, columnList, options), validCols,
2442 fProxiedPtr, std::move(newColumns)));
2443 }
2444
2445 fLoopManager->Book(actionPtr.get());
2446
2447 return RDFInternal::CreateSnapshotRDF(validCols, fullTreename, filename, options.fLazy, *fLoopManager,
2448 std::move(actionPtr));
2449 }
2450
2451 ////////////////////////////////////////////////////////////////////////////
2452 /// \brief Implementation of cache
2453 template <typename... BranchTypes, std::size_t... S>
2454 RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...> s)
2455 {
2456 // Check at compile time that the columns types are copy constructible
2457 constexpr bool areCopyConstructible =
2458 RDFInternal::TEvalAnd<std::is_copy_constructible<BranchTypes>::value...>::value;
2459 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
2460
2461 // We share bits and pieces with snapshot. De facto this is a snapshot
2462 // in memory!
2463 RDFInternal::CheckTypesAndPars(sizeof...(BranchTypes), columnList.size());
2464
2465 auto colHolders = std::make_tuple(Take<BranchTypes>(columnList[S])...);
2466 auto ds = std::make_unique<RLazyDS<BranchTypes...>>(std::make_pair(columnList[S], std::get<S>(colHolders))...);
2467
2468 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnList));
2469
2470 (void)s; // Prevents unused warning
2471
2472 return cachedRDF;
2473 }
2474
2475protected:
2476 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
2478 : fProxiedPtr(proxied), fLoopManager(&lm), fDataSource(ds), fCustomColumns(columns)
2479 {
2480 }
2481
2483
2484 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
2485
2486 /// Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames,
2487 /// which is expensive in terms of runtime, is called at most once.
2488 ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
2489 {
2491 fDataSource);
2492 }
2493
2494 template <typename... ColumnTypes, std::size_t... S>
2497 {
2498 return fDataSource
2499 ? RDFInternal::AddDSColumns(*fLoopManager, validCols, fCustomColumns, *fDataSource,
2500 fLoopManager->GetNSlots(), std::index_sequence_for<ColumnTypes...>(),
2503 }
2504};
2505
2506} // end NS RDF
2507
2508} // namespace ROOT
2509
2510#endif // ROOT_RDF_INTERFACE
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
unsigned int UInt_t
Definition: RtypesCore.h:42
unsigned long long ULong64_t
Definition: RtypesCore.h:70
const Int_t kError
Definition: TError.h:39
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
typedef void((*Func_t)())
unsigned int GetID() const
Return the unique identifier of this RCustomColumnBase.
The head node of a RDF computation graph.
const std::map< std::string, std::string > & GetAliasMap() const
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitDeclare(const std::string &s)
void ToJitExec(const std::string &s)
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
void JitDeclarations()
Declare to the interpreter type aliases and other entities required by RDF jitted nodes.
void AddColumnAlias(const std::string &alias, const std::string &colName)
RDataSource * GetDataSource() const
unsigned int GetNSlots() const
void Book(RDFInternal::RActionBase *actionPtr)
Helper class that provides the operation graph nodes.
An action node in a RDF computation graph.
Definition: RAction.hxx:217
Encapsulates the columns defined by the user.
void AddColumn(const std::shared_ptr< RDFDetail::RCustomColumnBase > &column, std::string_view name)
Internally it recreates the map with the new column, and swaps with the old one.
ColumnNames_t GetNames() const
Returns the list of the names of the defined columns.
bool HasName(std::string_view name) const
Check if the provided name is tracked in the names list.
const RCustomColumnBasePtrMap_t & GetColumns() const
Returns the list of the pointers to the defined columns.
void AddName(std::string_view name)
Internally it recreates the map with the new column name, and swaps with the old one.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
virtual bool HasColumn(std::string_view) const =0
Checks if the dataset has a certain column.
The public interface to the RDataFrame federation of classes.
Definition: RInterface.hxx:89
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action)
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RBookedCustomColumns &columns, RDataSource *ds)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:544
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action)
RLoopManager * GetLoopManager() const
RResultPtr<::TGraph > Graph(std::string_view v1Name="", std::string_view v2Name="")
Fill and return a graph (lazy action)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot.
Definition: RInterface.hxx:324
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterface(const std::shared_ptr< Proxied > &proxied)
Only enabled when building a RInterface<RLoopManager>
Definition: RInterface.hxx:126
RResultPtr< T > Fill(T &&model, const ColumnNames_t &bl)
Return an object of type T on which T::Fill will be called once per event (lazy action)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const int nColumns=-1)
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action)
Definition: RInterface.hxx:780
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:621
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column.
Definition: RInterface.hxx:295
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action)
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
Definition: RInterface.hxx:105
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action)
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames,...
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
RDFInternal::RBookedCustomColumns fCustomColumns
Contains the custom columns defined up to this node.
Definition: RInterface.hxx:108
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
Definition: RInterface.hxx:228
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:570
std::enable_if<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view, F, const ColumnNames_t &)
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
Definition: RInterface.hxx:405
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:609
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
Definition: RInterface.hxx:670
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", const int &nRows=5)
Provides a representation of the columns in the dataset.
RLoopManager * fLoopManager
Definition: RInterface.hxx:103
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
Definition: RInterface.hxx:96
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action)
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action)
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action)
Definition: RInterface.hxx:871
RResultPtr< T > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action)
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Creates a custom column.
Definition: RInterface.hxx:371
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
Definition: RInterface.hxx:101
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action)
Definition: RInterface.hxx:983
RDFInternal::RBookedCustomColumns CheckAndFillDSColumns(ColumnNames_t validCols, std::index_sequence< S... >, TTraits::TypeList< ColumnTypes... >)
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
Implementation of snapshot.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
Definition: RInterface.hxx:730
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default)
Definition: RInterface.hxx:904
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
Definition: RInterface.hxx:684
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action)
RResultPtr< typename Helper::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:463
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:481
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot and the current entry.
Definition: RInterface.hxx:354
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action)
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:830
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action)
Definition: RInterface.hxx:750
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:248
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
std::enable_if< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:187
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface(RInterface &&)=default
Move-ctor for RInterface.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:853
void CheckIMTDisabled(std::string_view callerName)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action)
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... > s)
Implementation of cache.
RDFDetail::ColumnNames_t ColumnNames_t
Definition: RInterface.hxx:91
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
Definition: RInterface.hxx:212
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end)
Definition: RInterface.hxx:708
std::vector< std::string > GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action)
Definition: RInterface.hxx:946
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action)
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:41
Smart pointer for the return type of actions.
Definition: RResultPtr.hxx:72
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
Definition: RDataFrame.hxx:42
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
Statistical variable, defined by its mean and variance (RMS).
Definition: TStatistic.h:35
basic_string_view< char > string_view
#define F(x, y, z)
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, RLoopManager &df, std::shared_ptr< ROOT::Internal::RDF::RActionBase > actionPtr)
Create a RResultPtr and set its pointer to the corresponding RAction This overload is invoked by non-...
Definition: RResultPtr.hxx:346
ColumnNames_t GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:83
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
ColumnNames_t ConvertRegexToColumns(const RDFInternal::RBookedCustomColumns &customColumns, TTree *tree, ROOT::RDF::RDataSource *dataSource, std::string_view columnNameRegexp, std::string_view callerName)
std::string PrettyPrintAddr(const void *const addr)
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
HeadNode_t CreateSnapshotRDF(const ColumnNames_t &validCols, std::string_view treeName, std::string_view fileName, bool isLazy, RLoopManager &loopManager, std::unique_ptr< RDFInternal::RActionBase > actionPtr)
std::string JitBuildAction(const ColumnNames_t &bl, void *prevNode, const std::type_info &art, const std::type_info &at, void *rOnHeap, TTree *tree, const unsigned int nSlots, const RDFInternal::RBookedCustomColumns &customCols, RDataSource *ds, std::shared_ptr< RJittedAction > *jittedActionOnHeap, unsigned int namespaceID)
bool IsInternalColumn(std::string_view colName)
Long64_t InterpreterCalc(const std::string &code, const std::string &context)
Definition: RDFUtils.cxx:310
std::string ColumnName2ColumnTypeName(const std::string &colName, unsigned int namespaceID, TTree *tree, RDataSource *ds, bool isCustomColumn, bool vector2rvec, unsigned int customColID)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:210
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const ColumnNames_t &validCustomColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
void BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const std::shared_ptr< RJittedCustomColumn > &jittedCustomColumn, const RDFInternal::RBookedCustomColumns &customCols, const ColumnNames_t &branches)
void BookFilterJit(RJittedFilter *jittedFilter, void *prevNodeOnHeap, std::string_view name, std::string_view expression, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &branches, const RDFInternal::RBookedCustomColumns &customCols, TTree *tree, RDataSource *ds, unsigned int namespaceID)
void CheckCustomColumn(std::string_view definedCol, TTree *treePtr, const ColumnNames_t &customCols, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &dataSourceColumns)
double T(double x)
Definition: ChebyshevPol.h:34
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
ROOT type_traits extensions.
Definition: TypeTraits.hxx:23
VSD Structures.
Definition: StringConv.hxx:21
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition: TROOT.cxx:580
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:611
ROOT::Detail::RDF::ColumnNames_t ColumnNames_t
Definition: RDataFrame.cxx:788
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition: TROOT.cxx:597
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1194
RooArgSet S(const RooAbsArg &v1)
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
static constexpr double s
Definition: graph.py:1
Definition: tree.py:1
A collection of options to steer the creation of the dataset on file.
bool fLazy
Delay the snapshot of the dataset.
A struct which stores the parameters of a TH1D.
Definition: HistoModels.hxx:27
std::shared_ptr<::TH1D > GetHistogram() const
A struct which stores the parameters of a TH2D.
Definition: HistoModels.hxx:45
std::shared_ptr<::TH2D > GetHistogram() const
A struct which stores the parameters of a TH3D.
Definition: HistoModels.hxx:70
std::shared_ptr<::TH3D > GetHistogram() const
A struct which stores the parameters of a TProfile.
Definition: HistoModels.hxx:99
std::shared_ptr<::TProfile > GetProfile() const
A struct which stores the parameters of a TProfile2D.
std::shared_ptr<::TProfile2D > GetProfile() const
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:27