Logo ROOT  
Reference Guide
RInterface.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
13
14#include "ROOT/RDataSource.hxx"
19#include "ROOT/RDF/RRange.hxx"
20#include "ROOT/RDF/Utils.hxx"
23#include "ROOT/RResultPtr.hxx"
25#include "ROOT/RStringView.hxx"
26#include "ROOT/TypeTraits.hxx"
27#include "RtypesCore.h" // for ULong64_t
28#include "TH1.h" // For Histo actions
29#include "TH2.h" // For Histo actions
30#include "TH3.h" // For Histo actions
31#include "TProfile.h"
32#include "TProfile2D.h"
33#include "TStatistic.h"
34
35#include <algorithm>
36#include <cstddef>
37#include <initializer_list>
38#include <limits>
39#include <memory>
40#include <sstream>
41#include <stdexcept>
42#include <string>
43#include <type_traits> // is_same, enable_if
44#include <typeinfo>
45#include <vector>
46
47class TGraph;
48
49// Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
50namespace ROOT {
53void EnableImplicitMT(UInt_t numthreads);
54class RDataFrame;
55namespace Internal {
56namespace RDF {
58}
59}
60}
61namespace cling {
62std::string printValue(ROOT::RDataFrame *tdf);
63}
64
65namespace ROOT {
66namespace RDF {
69namespace TTraits = ROOT::TypeTraits;
70
71template <typename Proxied, typename DataSource>
72class RInterface;
73
74using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
75
76// clang-format off
77/**
78 * \class ROOT::RDF::RInterface
79 * \ingroup dataframe
80 * \brief The public interface to the RDataFrame federation of classes
81 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
82 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
83 *
84 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
85 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
86 */
87// clang-format on
88template <typename Proxied, typename DataSource = void>
90 using DS_t = DataSource;
91 using ColumnNames_t = RDFDetail::ColumnNames_t;
95 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
97
98 template <typename T, typename W>
99 friend class RInterface;
100
101 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
102 ///< The RLoopManager at the root of this computation graph. Never null.
104 /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
106
107 /// Contains the custom columns defined up to this node.
109
110public:
111 ////////////////////////////////////////////////////////////////////////////
112 /// \brief Copy-assignment operator for RInterface.
113 RInterface &operator=(const RInterface &) = default;
114
115 ////////////////////////////////////////////////////////////////////////////
116 /// \brief Copy-ctor for RInterface.
117 RInterface(const RInterface &) = default;
118
119 ////////////////////////////////////////////////////////////////////////////
120 /// \brief Move-ctor for RInterface.
121 RInterface(RInterface &&) = default;
122
123 ////////////////////////////////////////////////////////////////////////////
124 /// \brief Only enabled when building a RInterface<RLoopManager>
125 template <typename T = Proxied, typename std::enable_if<std::is_same<T, RLoopManager>::value, int>::type = 0>
126 RInterface(const std::shared_ptr<Proxied> &proxied)
127 : fProxiedPtr(proxied), fLoopManager(proxied.get()), fDataSource(proxied->GetDataSource())
128 {
130 }
131
132 ////////////////////////////////////////////////////////////////////////////
133 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
134 /// Different RDataFrame methods return different C++ types. All nodes, however,
135 /// can be cast to this common type at the cost of a small performance penalty.
136 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
137 /// around via (non-template, C++11) helper functions.
138 /// Example usage:
139 /// ~~~{.cpp}
140 /// // a function that conditionally adds a Range to a RDataFrame node.
141 /// RNode MaybeAddRange(RNode df, bool mustAddRange)
142 /// {
143 /// return mustAddRange ? df.Range(1) : df;
144 /// }
145 /// // use as :
146 /// ROOT::RDataFrame df(10);
147 /// auto maybeRanged = MaybeAddRange(df, true);
148 /// ~~~
149 /// Note that it is not a problem to pass RNode's by value.
150 operator RNode() const
151 {
152 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fCustomColumns,
154 }
155
156 ////////////////////////////////////////////////////////////////////////////
157 /// \brief Append a filter to the call graph.
158 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
159 /// signalling whether the event has passed the selection (true) or not (false).
160 /// \param[in] columns Names of the columns/branches in input to the filter function.
161 /// \param[in] name Optional name of this filter. See `Report`.
162 /// \return the filter node of the computation graph.
163 ///
164 /// Append a filter node at the point of the call graph corresponding to the
165 /// object this method is called on.
166 /// The callable `f` should not have side-effects (e.g. modification of an
167 /// external or static variable) to ensure correct results when implicit
168 /// multi-threading is active.
169 ///
170 /// RDataFrame only evaluates filters when necessary: if multiple filters
171 /// are chained one after another, they are executed in order and the first
172 /// one returning false causes the event to be discarded.
173 /// Even if multiple actions or transformations depend on the same filter,
174 /// it is executed once per entry. If its result is requested more than
175 /// once, the cached result is served.
176 ///
177 /// ### Example usage:
178 /// ~~~{.cpp}
179 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
180 /// auto filtered = df.Filter(myCut, {"x", "y"});
181 ///
182 /// // String: it must contain valid C++ except that column names can be used instead of variable names
183 /// auto filtered = df.Filter("x*y > 0");
184 /// ~~~
185 template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
187 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
188 {
189 RDFInternal::CheckFilter(f);
190 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
191 constexpr auto nColumns = ColTypes_t::list_size;
192 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
193 const auto newColumns =
194 CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
195
197
198 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, newColumns, name);
199 fLoopManager->Book(filterPtr.get());
200 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, newColumns, fDataSource);
201 }
202
203 ////////////////////////////////////////////////////////////////////////////
204 /// \brief Append a filter to the call graph.
205 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
206 /// signalling whether the event has passed the selection (true) or not (false).
207 /// \param[in] name Optional name of this filter. See `Report`.
208 /// \return the filter node of the computation graph.
209 ///
210 /// Refer to the first overload of this method for the full documentation.
211 template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
213 {
214 // The sfinae is there in order to pick up the overloaded method which accepts two strings
215 // rather than this template method.
216 return Filter(f, {}, name);
217 }
218
219 ////////////////////////////////////////////////////////////////////////////
220 /// \brief Append a filter to the call graph.
221 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
222 /// signalling whether the event has passed the selection (true) or not (false).
223 /// \param[in] columns Names of the columns/branches in input to the filter function.
224 /// \return the filter node of the computation graph.
225 ///
226 /// Refer to the first overload of this method for the full documentation.
227 template <typename F>
228 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
229 {
230 return Filter(f, ColumnNames_t{columns});
231 }
232
233 ////////////////////////////////////////////////////////////////////////////
234 /// \brief Append a filter to the call graph.
235 /// \param[in] expression The filter expression in C++
236 /// \param[in] name Optional name of this filter. See `Report`.
237 /// \return the filter node of the computation graph.
238 ///
239 /// The expression is just-in-time compiled and used to filter entries. It must
240 /// be valid C++ syntax in which variable names are substituted with the names
241 /// of branches/columns.
242 ///
243 /// ### Example usage:
244 /// ~~~{.cpp}
245 /// auto filtered_df = df.Filter("myCollection.size() > 3");
246 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
247 /// ~~~
249 {
250 // deleted by the jitted call to JitFilterHelper
251 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
252 using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
253 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
254 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(fLoopManager, name);
255
256 RDFInternal::BookFilterJit(jittedFilter, upcastNodeOnHeap, name, expression, fLoopManager->GetAliasMap(),
258
259 fLoopManager->Book(jittedFilter.get());
262 }
263
264 // clang-format off
265 ////////////////////////////////////////////////////////////////////////////
266 /// \brief Creates a custom column
267 /// \param[in] name The name of the custom column.
268 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
269 /// \param[in] columns Names of the columns/branches in input to the producer function.
270 /// \return the first node of the computation graph for which the new quantity is defined.
271 ///
272 /// Create a custom column that will be visible from all subsequent nodes
273 /// of the functional chain. The `expression` is only evaluated for entries that pass
274 /// all the preceding filters.
275 /// A new variable is created called `name`, accessible as if it was contained
276 /// in the dataset from subsequent transformations/actions.
277 ///
278 /// Use cases include:
279 /// * caching the results of complex calculations for easy and efficient multiple access
280 /// * extraction of quantities of interest from complex objects
281 ///
282 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
283 ///
284 /// ### Example usage:
285 /// ~~~{.cpp}
286 /// // assuming a function with signature:
287 /// double myComplexCalculation(const RVec<float> &muon_pts);
288 /// // we can pass it directly to Define
289 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
290 /// // alternatively, we can pass the body of the function as a string, as in Filter:
291 /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
292 /// ~~~
293 template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
295 {
296 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns);
297 }
298 // clang-format on
299
300 // clang-format off
301 ////////////////////////////////////////////////////////////////////////////
302 /// \brief Creates a custom column with a value dependent on the processing slot.
303 /// \param[in] name The name of the custom column.
304 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
305 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
306 /// \return the first node of the computation graph for which the new quantity is defined.
307 ///
308 /// This alternative implementation of `Define` is meant as a helper in writing thread-safe custom columns.
309 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
310 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
311 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
312 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
313 ///
314 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
315 /// ~~~{.cpp}
316 /// int function(unsigned int, double, double);
317 /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
318 /// df.DefineSlot("x", function, {"column1", "column2"})
319 /// ~~~
320 ///
321 /// See Define for more information.
322 template <typename F>
324 {
325 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns);
326 }
327 // clang-format on
328
329 // clang-format off
330 ////////////////////////////////////////////////////////////////////////////
331 /// \brief Creates a custom column with a value dependent on the processing slot and the current entry.
332 /// \param[in] name The name of the custom column.
333 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
334 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
335 /// \return the first node of the computation graph for which the new quantity is defined.
336 ///
337 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
338 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
339 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
340 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
341 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter
342 /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
343 ///
344 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
345 /// ~~~{.cpp}
346 /// int function(unsigned int, ULong64_t, double, double);
347 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
348 /// DefineSlotEntry("x", function, {"column1", "column2"})
349 /// ~~~
350 ///
351 /// See Define for more information.
352 template <typename F>
354 {
355 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns);
356 }
357 // clang-format on
358
359 ////////////////////////////////////////////////////////////////////////////
360 /// \brief Creates a custom column
361 /// \param[in] name The name of the custom column.
362 /// \param[in] expression An expression in C++ which represents the temporary value
363 /// \return the first node of the computation graph for which the new quantity is defined.
364 ///
365 /// The expression is just-in-time compiled and used to produce the column entries.
366 /// It must be valid C++ syntax in which variable names are substituted with the names
367 /// of branches/columns.
368 ///
369 /// Refer to the first overload of this method for the full documentation.
371 {
372 // this check must be done before jitting lest we throw exceptions in jitted code
376
377 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
378 auto jittedCustomColumn = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fCustomColumns,
379 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
380
382 newCols.AddName(name);
383 newCols.AddColumn(jittedCustomColumn, name);
384
385 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
386
387 return newInterface;
388 }
389
390 ////////////////////////////////////////////////////////////////////////////
391 /// \brief Allow to refer to a column with a different name
392 /// \param[in] alias name of the column alias
393 /// \param[in] columnName of the column to be aliased
394 /// \return the first node of the computation graph for which the alias is available.
395 ///
396 /// Aliasing an alias is supported.
397 ///
398 /// ### Example usage:
399 /// ~~~{.cpp}
400 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
401 /// ~~~
403 {
404 // The symmetry with Define is clear. We want to:
405 // - Create globally the alias and return this very node, unchanged
406 // - Make aliases accessible based on chains and not globally
407
408 // Helper to find out if a name is a column
409 auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
410
411 // If the alias name is a column name, there is a problem
413 fLoopManager->GetAliasMap(), dsColumnNames);
414
415 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
416
417 fLoopManager->AddColumnAlias(std::string(alias), validColumnName);
418
420
421 newCols.AddName(alias);
422 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
423
424 return newInterface;
425 }
426
427 ////////////////////////////////////////////////////////////////////////////
428 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
429 /// \tparam ColumnTypes variadic list of branch/column types.
430 /// \param[in] treename The name of the output TTree.
431 /// \param[in] filename The name of the output TFile.
432 /// \param[in] columnList The list of names of the columns/branches to be written.
433 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
434 /// \return a `RDataFrame` that wraps the snapshotted dataset.
435 ///
436 /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
437 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
438 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
439 /// written out and it appears before the array in the columnList.
440 ///
441 /// ### Example invocations:
442 ///
443 /// ~~~{.cpp}
444 /// // without specifying template parameters (column types automatically deduced)
445 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
446 ///
447 /// // specifying template parameters ("x" is `int`, "y" is `float`)
448 /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
449 /// ~~~
450 ///
451 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
452 /// `RSnapshotOptions`:
453 /// ~~~{.cpp}
454 /// RSnapshotOptions opts;
455 /// opts.fLazy = true;
456 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
457 /// ~~~
458 template <typename... ColumnTypes>
460 Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
461 const RSnapshotOptions &options = RSnapshotOptions())
462 {
463 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
464 }
465
466 ////////////////////////////////////////////////////////////////////////////
467 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
468 /// \param[in] treename The name of the output TTree.
469 /// \param[in] filename The name of the output TFile.
470 /// \param[in] columnList The list of names of the columns/branches to be written.
471 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
472 /// \return a `RDataFrame` that wraps the snapshotted dataset.
473 ///
474 /// This function returns a `RDataFrame` built with the output tree as a source.
475 /// The types of the columns are automatically inferred and do not need to be specified.
476 ///
477 /// See above for a more complete description and example usages.
479 const ColumnNames_t &columnList,
480 const RSnapshotOptions &options = RSnapshotOptions())
481 {
482 // Early return: if the list of columns is empty, just return an empty RDF
483 // If we proceed, the jitted call will not compile!
484 if (columnList.empty()) {
485 auto nEntries = *this->Count();
486 auto snapshotRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(nEntries));
487 return MakeResultPtr(snapshotRDF, *fLoopManager, nullptr);
488 }
489 std::stringstream snapCall;
490 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
491 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
493
494 // build a string equivalent to
495 // "resPtr = (RInterface<nodetype*>*)(this)->Snapshot<Ts...>(args...)"
497 snapCall << "*reinterpret_cast<ROOT::RDF::RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>>*>("
499 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
500 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Snapshot<";
501
502
503 const auto validColumnNames = GetValidatedColumnNames(columnList.size(), columnList);
504 const auto colTypes = GetValidatedArgTypes(validColumnNames, fCustomColumns, fLoopManager->GetTree(), fDataSource,
505 "Snapshot", /*vector2rvec=*/false);
506
507 for (auto &colType : colTypes)
508 snapCall << colType << ", ";
509 if (!colTypes.empty())
510 snapCall.seekp(-2, snapCall.cur); // remove the last ",
511 snapCall << ">(\"" << treename << "\", \"" << filename << "\", "
512 << "*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
513 << RDFInternal::PrettyPrintAddr(&columnList) << "),"
514 << "*reinterpret_cast<ROOT::RDF::RSnapshotOptions*>(" << RDFInternal::PrettyPrintAddr(&options) << "));";
515 // jit snapCall, return result
516 RDFInternal::InterpreterCalc(snapCall.str(), "Snapshot");
517 return resPtr;
518 }
519
520 // clang-format off
521 ////////////////////////////////////////////////////////////////////////////
522 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
523 /// \param[in] treename The name of the output TTree.
524 /// \param[in] filename The name of the output TFile.
525 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
526 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
527 /// \return a `RDataFrame` that wraps the snapshotted dataset.
528 ///
529 /// This function returns a `RDataFrame` built with the output tree as a source.
530 /// The types of the columns are automatically inferred and do not need to be specified.
531 ///
532 /// See above for a more complete description and example usages.
534 std::string_view columnNameRegexp = "",
535 const RSnapshotOptions &options = RSnapshotOptions())
536 {
540 columnNameRegexp,
541 "Snapshot");
542 return Snapshot(treename, filename, selectedColumns, options);
543 }
544 // clang-format on
545
546 // clang-format off
547 ////////////////////////////////////////////////////////////////////////////
548 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
549 /// \param[in] treename The name of the output TTree.
550 /// \param[in] filename The name of the output TFile.
551 /// \param[in] columnList The list of names of the columns/branches to be written.
552 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
553 /// \return a `RDataFrame` that wraps the snapshotted dataset.
554 ///
555 /// This function returns a `RDataFrame` built with the output tree as a source.
556 /// The types of the columns are automatically inferred and do not need to be specified.
557 ///
558 /// See above for a more complete description and example usages.
560 std::initializer_list<std::string> columnList,
561 const RSnapshotOptions &options = RSnapshotOptions())
562 {
563 ColumnNames_t selectedColumns(columnList);
564 return Snapshot(treename, filename, selectedColumns, options);
565 }
566 // clang-format on
567
568 ////////////////////////////////////////////////////////////////////////////
569 /// \brief Save selected columns in memory
570 /// \tparam ColumnTypes variadic list of branch/column types.
571 /// \param[in] columns to be cached in memory.
572 /// \return a `RDataFrame` that wraps the cached dataset.
573 ///
574 /// This action returns a new `RDataFrame` object, completely detached from
575 /// the originating `RDataFrame`. The new dataframe only contains the cached
576 /// columns and stores their content in memory for fast, zero-copy subsequent access.
577 ///
578 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
579 /// fits in memory and that will be accessed many times.
580 ///
581 /// ### Example usage:
582 ///
583 /// **Types and columns specified:**
584 /// ~~~{.cpp}
585 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
586 /// ~~~
587 ///
588 /// **Types inferred and columns specified (this invocation relies on jitting):**
589 /// ~~~{.cpp}
590 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
591 /// ~~~
592 ///
593 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
594 /// ~~~{.cpp}
595 /// auto cache_all_cols_df = df.Cache(myRegexp);
596 /// ~~~
597 template <typename... ColumnTypes>
599 {
600 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
601 return CacheImpl<ColumnTypes...>(columnList, staticSeq);
602 }
603
604 ////////////////////////////////////////////////////////////////////////////
605 /// \brief Save selected columns in memory
606 /// \param[in] columns to be cached in memory
607 /// \return a `RDataFrame` that wraps the cached dataset.
608 ///
609 /// See the previous overloads for more information.
611 {
612 // Early return: if the list of columns is empty, just return an empty RDF
613 // If we proceed, the jitted call will not compile!
614 if (columnList.empty()) {
615 auto nEntries = *this->Count();
616 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
617 return emptyRDF;
618 }
619
620 std::stringstream cacheCall;
621 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
622 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
624 // build a string equivalent to
625 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
626 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
627 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
629 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
630 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
631
632 const auto validColumnNames = GetValidatedColumnNames(columnList.size(), columnList);
633 const auto colTypes = GetValidatedArgTypes(validColumnNames, fCustomColumns, fLoopManager->GetTree(), fDataSource,
634 "Cache", /*vector2rvec=*/false);
635 for (const auto &colType : colTypes)
636 cacheCall << colType << ", ";
637 if (!columnList.empty())
638 cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
639 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
640 << RDFInternal::PrettyPrintAddr(&columnList) << "));";
641 // jit cacheCall, return result
642 RDFInternal::InterpreterCalc(cacheCall.str(), "Cache");
643 return resRDF;
644 }
645
646 ////////////////////////////////////////////////////////////////////////////
647 /// \brief Save selected columns in memory
648 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
649 /// \return a `RDataFrame` that wraps the cached dataset.
650 ///
651 /// The existing columns are matched against the regular expression. If the string provided
652 /// is empty, all columns are selected. See the previous overloads for more information.
654 {
655
657 columnNameRegexp, "Cache");
658 return Cache(selectedColumns);
659 }
660
661 ////////////////////////////////////////////////////////////////////////////
662 /// \brief Save selected columns in memory
663 /// \param[in] columns to be cached in memory.
664 /// \return a `RDataFrame` that wraps the cached dataset.
665 ///
666 /// See the previous overloads for more information.
667 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
668 {
669 ColumnNames_t selectedColumns(columnList);
670 return Cache(selectedColumns);
671 }
672
673 // clang-format off
674 ////////////////////////////////////////////////////////////////////////////
675 /// \brief Creates a node that filters entries based on range: [begin, end)
676 /// \param[in] begin Initial entry number considered for this range.
677 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
678 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
679 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
680 ///
681 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
682 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
683 ///
684 /// ### Example usage:
685 /// ~~~{.cpp}
686 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
687 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
688 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
689 /// ~~~
690 // clang-format on
691 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
692 {
693 // check invariants
694 if (stride == 0 || (end != 0 && end < begin))
695 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
696 CheckIMTDisabled("Range");
697
699 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
700 fLoopManager->Book(rangePtr.get());
702 return tdf_r;
703 }
704
705 // clang-format off
706 ////////////////////////////////////////////////////////////////////////////
707 /// \brief Creates a node that filters entries based on range
708 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
709 /// \return a node of the computation graph for which the range is defined.
710 ///
711 /// See the other Range overload for a detailed description.
712 // clang-format on
713 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
714
715 // clang-format off
716 ////////////////////////////////////////////////////////////////////////////
717 /// \brief Execute a user-defined function on each entry (*instant action*)
718 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
719 /// \param[in] columns Names of the columns/branches in input to the user function.
720 ///
721 /// The callable `f` is invoked once per entry. This is an *instant action*:
722 /// upon invocation, an event loop as well as execution of all scheduled actions
723 /// is triggered.
724 /// Users are responsible for the thread-safety of this callable when executing
725 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
726 ///
727 /// ### Example usage:
728 /// ~~~{.cpp}
729 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
730 /// ~~~
731 // clang-format on
732 template <typename F>
733 void Foreach(F f, const ColumnNames_t &columns = {})
734 {
735 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
736 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
737 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
738 }
739
740 // clang-format off
741 ////////////////////////////////////////////////////////////////////////////
742 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*)
743 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
744 /// \param[in] columns Names of the columns/branches in input to the user function.
745 ///
746 /// Same as `Foreach`, but the user-defined function takes an extra
747 /// `unsigned int` as its first parameter, the *processing slot index*.
748 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
749 /// for each thread of execution.
750 /// This is meant as a helper in writing thread-safe `Foreach`
751 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
752 /// The user-defined processing callable is able to follow different
753 /// *streams of processing* indexed by the first parameter.
754 /// `ForeachSlot` works just as well with single-thread execution: in that
755 /// case `slot` will always be `0`.
756 ///
757 /// ### Example usage:
758 /// ~~~{.cpp}
759 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
760 /// ~~~
761 // clang-format on
762 template <typename F>
763 void ForeachSlot(F f, const ColumnNames_t &columns = {})
764 {
765 using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
766 constexpr auto nColumns = ColTypes_t::list_size;
767
768 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
769
770 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
771
772 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
774
775 auto action =
776 std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, std::move(newColumns));
777 fLoopManager->Book(action.get());
778
779 fLoopManager->Run();
780 }
781
782 // clang-format off
783 ////////////////////////////////////////////////////////////////////////////
784 /// \brief Execute a user-defined reduce operation on the values of a column.
785 /// \tparam F The type of the reduce callable. Automatically deduced.
786 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
787 /// \param[in] f A callable with signature `T(T,T)`
788 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
789 /// \return the reduced quantity wrapped in a `RResultPtr`.
790 ///
791 /// A reduction takes two values of a column and merges them into one (e.g.
792 /// by summing them, taking the maximum, etc). This action performs the
793 /// specified reduction operation on all processed column values, returning
794 /// a single value of the same type. The callable f must satisfy the general
795 /// requirements of a *processing function* besides having signature `T(T,T)`
796 /// where `T` is the type of column columnName.
797 ///
798 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
799 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
800 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
801 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
802 /// overload.
803 ///
804 /// ### Example usage:
805 /// ~~~{.cpp}
806 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
807 /// ~~~
808 ///
809 /// This action is *lazy*: upon invocation of this method the calculation is
810 /// booked but not executed. See RResultPtr documentation.
811 // clang-format on
812 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
814 {
815 static_assert(
816 std::is_default_constructible<T>::value,
817 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
818 return Reduce(std::move(f), columnName, T());
819 }
820
821 ////////////////////////////////////////////////////////////////////////////
822 /// \brief Execute a user-defined reduce operation on the values of a column.
823 /// \tparam F The type of the reduce callable. Automatically deduced.
824 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
825 /// \param[in] f A callable with signature `T(T,T)`
826 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
827 /// \param[in] redIdentity The reduced object of each thread is initialised to this value.
828 /// \return the reduced quantity wrapped in a `RResultPtr`.
829 ///
830 /// ### Example usage:
831 /// ~~~{.cpp}
832 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
833 /// ~~~
834 /// See the description of the first Reduce overload for more information.
835 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
836 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
837 {
838 return Aggregate(f, f, columnName, redIdentity);
839 }
840
841 ////////////////////////////////////////////////////////////////////////////
842 /// \brief Return the number of entries processed (*lazy action*)
843 /// \return the number of entries wrapped in a `RResultPtr`.
844 ///
845 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
846 /// This action is *lazy*: upon invocation of this method the calculation is
847 /// booked but not executed. See RResultPtr documentation.
848 ///
849 /// ### Example usage:
850 /// ~~~{.cpp}
851 /// auto nEntriesAfterCuts = myFilteredDf.Count();
852 /// ~~~
853 ///
855 {
856 const auto nSlots = fLoopManager->GetNSlots();
857 auto cSPtr = std::make_shared<ULong64_t>(0);
858 using Helper_t = RDFInternal::CountHelper;
860 auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
862 fLoopManager->Book(action.get());
863 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
864 }
865
866 ////////////////////////////////////////////////////////////////////////////
867 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default)
868 /// \tparam T The type of the column.
869 /// \tparam COLL The type of collection used to store the values.
870 /// \param[in] column The name of the column to collect the values of.
871 /// \return the content of the selected column wrapped in a `RResultPtr`.
872 ///
873 /// The collection type to be specified for C-style array columns is `RVec<T>`:
874 /// in this case the returned collection is a `std::vector<RVec<T>>`.
875 /// ### Example usage:
876 /// ~~~{.cpp}
877 /// // In this case intCol is a std::vector<int>
878 /// auto intCol = rdf.Take<int>("integerColumn");
879 /// // Same content as above but in this case taken as a RVec<int>
880 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
881 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
882 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
883 /// ~~~
884 /// This action is *lazy*: upon invocation of this method the calculation is
885 /// booked but not executed. See RResultPtr documentation.
886 template <typename T, typename COLL = std::vector<T>>
888 {
889 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
890
891 const auto validColumnNames = GetValidatedColumnNames(1, columns);
892
893 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<1>(), TTraits::TypeList<T>());
894
895 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
897 auto valuesPtr = std::make_shared<COLL>();
898 const auto nSlots = fLoopManager->GetNSlots();
899
900 auto action =
901 std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, std::move(newColumns));
902 fLoopManager->Book(action.get());
903 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
904 }
905
906 ////////////////////////////////////////////////////////////////////////////
907 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
908 /// \tparam V The type of the column used to fill the histogram.
909 /// \param[in] model The returned histogram will be constructed using this as a model.
910 /// \param[in] vName The name of the column that will fill the histogram.
911 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
912 ///
913 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
914 /// is filled with each one of the elements of the container. In case multiple columns of container type
915 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
916 /// possibly different lengths between events).
917 /// This action is *lazy*: upon invocation of this method the calculation is
918 /// booked but not executed. See RResultPtr documentation.
919 ///
920 /// ### Example usage:
921 /// ~~~{.cpp}
922 /// // Deduce column type (this invocation needs jitting internally)
923 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
924 /// // Explicit column type
925 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
926 /// ~~~
927 ///
928 template <typename V = RDFDetail::RInferredType>
929 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
930 {
931 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
932
933 const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
934
935 std::shared_ptr<::TH1D> h(nullptr);
936 {
937 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
938 h = model.GetHistogram();
939 h->SetDirectory(nullptr);
940 }
941
942 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
943 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
944 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h);
945 }
946
947 ////////////////////////////////////////////////////////////////////////////
948 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
949 /// \tparam V The type of the column used to fill the histogram.
950 /// \param[in] vName The name of the column that will fill the histogram.
951 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
952 ///
953 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
954 /// The "name" and "title" strings are built starting from the input column name.
955 /// See the description of the first Histo1D overload for more details.
956 ///
957 /// ### Example usage:
958 /// ~~~{.cpp}
959 /// // Deduce column type (this invocation needs jitting internally)
960 /// auto myHist1 = myDf.Histo1D("myColumn");
961 /// // Explicit column type
962 /// auto myHist2 = myDf.Histo1D<float>("myColumn");
963 /// ~~~
964 ///
965 template <typename V = RDFDetail::RInferredType>
967 {
968 const auto h_name = std::string(vName);
969 const auto h_title = h_name + ";" + h_name + ";count";
970 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
971 }
972
973 ////////////////////////////////////////////////////////////////////////////
974 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
975 /// \tparam V The type of the column used to fill the histogram.
976 /// \tparam W The type of the column used as weights.
977 /// \param[in] model The returned histogram will be constructed using this as a model.
978 /// \param[in] vName The name of the column that will fill the histogram.
979 /// \param[in] wName The name of the column that will provide the weights.
980 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
981 ///
982 /// See the description of the first Histo1D overload for more details.
983 ///
984 /// ### Example usage:
985 /// ~~~{.cpp}
986 /// // Deduce column type (this invocation needs jitting internally)
987 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
988 /// // Explicit column type
989 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
990 /// ~~~
991 ///
992 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
994 {
995 const std::vector<std::string_view> columnViews = {vName, wName};
996 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
997 ? ColumnNames_t()
998 : ColumnNames_t(columnViews.begin(), columnViews.end());
999 std::shared_ptr<::TH1D> h(nullptr);
1000 {
1001 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1002 h = model.GetHistogram();
1003 }
1004 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h);
1005 }
1006
1007 ////////////////////////////////////////////////////////////////////////////
1008 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1009 /// \tparam V The type of the column used to fill the histogram.
1010 /// \tparam W The type of the column used as weights.
1011 /// \param[in] vName The name of the column that will fill the histogram.
1012 /// \param[in] wName The name of the column that will provide the weights.
1013 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1014 ///
1015 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1016 /// The "name" and "title" strings are built starting from the input column names.
1017 /// See the description of the first Histo1D overload for more details.
1018 ///
1019 /// ### Example usage:
1020 /// ~~~{.cpp}
1021 /// // Deduce column types (this invocation needs jitting internally)
1022 /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1023 /// // Explicit column types
1024 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1025 /// ~~~
1026 ///
1027 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1029 {
1030 // We build name and title based on the value and weight column names
1031 std::string str_vName{vName};
1032 std::string str_wName{wName};
1033 const auto h_name = str_vName + "_weighted_" + str_wName;
1034 const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1035 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1036 }
1037
1038 ////////////////////////////////////////////////////////////////////////////
1039 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1040 /// \tparam V The type of the column used to fill the histogram.
1041 /// \tparam W The type of the column used as weights.
1042 /// \param[in] model The returned histogram will be constructed using this as a model.
1043 /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1044 ///
1045 /// This overload will use the first two default columns as column names.
1046 /// See the description of the first Histo1D overload for more details.
1047 template <typename V, typename W>
1048 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1049 {
1050 return Histo1D<V, W>(model, "", "");
1051 }
1052
1053 ////////////////////////////////////////////////////////////////////////////
1054 /// \brief Fill and return a two-dimensional histogram (*lazy action*)
1055 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1056 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1057 /// \param[in] model The returned histogram will be constructed using this as a model.
1058 /// \param[in] v1Name The name of the column that will fill the x axis.
1059 /// \param[in] v2Name The name of the column that will fill the y axis.
1060 /// \return the bidimensional histogram wrapped in a `RResultPtr`.
1061 ///
1062 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1063 /// is filled with each one of the elements of the container. In case multiple columns of container type
1064 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1065 /// possibly different lengths between events).
1066 /// This action is *lazy*: upon invocation of this method the calculation is
1067 /// booked but not executed. See RResultPtr documentation.
1068 ///
1069 /// ### Example usage:
1070 /// ~~~{.cpp}
1071 /// // Deduce column types (this invocation needs jitting internally)
1072 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1073 /// // Explicit column types
1074 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1075 /// ~~~
1076 ///
1077 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1079 {
1080 std::shared_ptr<::TH2D> h(nullptr);
1081 {
1082 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1083 h = model.GetHistogram();
1084 }
1085 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1086 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1087 }
1088 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1089 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1090 ? ColumnNames_t()
1091 : ColumnNames_t(columnViews.begin(), columnViews.end());
1092 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h);
1093 }
1094
1095 ////////////////////////////////////////////////////////////////////////////
1096 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*)
1097 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1098 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1099 /// \tparam W The type of the column used for the weights of the histogram.
1100 /// \param[in] model The returned histogram will be constructed using this as a model.
1101 /// \param[in] v1Name The name of the column that will fill the x axis.
1102 /// \param[in] v2Name The name of the column that will fill the y axis.
1103 /// \param[in] wName The name of the column that will provide the weights.
1104 /// \return the bidimensional histogram wrapped in a `RResultPtr`.
1105 ///
1106 /// This action is *lazy*: upon invocation of this method the calculation is
1107 /// booked but not executed. See RResultPtr documentation.
1108 /// The user gives up ownership of the model histogram.
1109 ///
1110 /// ### Example usage:
1111 /// ~~~{.cpp}
1112 /// // Deduce column types (this invocation needs jitting internally)
1113 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1114 /// // Explicit column types
1115 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1116 /// ~~~
1117 ///
1118 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1119 typename W = RDFDetail::RInferredType>
1122 {
1123 std::shared_ptr<::TH2D> h(nullptr);
1124 {
1125 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1126 h = model.GetHistogram();
1127 }
1128 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1129 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1130 }
1131 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1132 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1133 ? ColumnNames_t()
1134 : ColumnNames_t(columnViews.begin(), columnViews.end());
1135 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h);
1136 }
1137
1138 template <typename V1, typename V2, typename W>
1140 {
1141 return Histo2D<V1, V2, W>(model, "", "", "");
1142 }
1143
1144 ////////////////////////////////////////////////////////////////////////////
1145 /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1146 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1147 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1148 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1149 /// \param[in] model The returned histogram will be constructed using this as a model.
1150 /// \param[in] v1Name The name of the column that will fill the x axis.
1151 /// \param[in] v2Name The name of the column that will fill the y axis.
1152 /// \param[in] v3Name The name of the column that will fill the z axis.
1153 /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1154 ///
1155 /// This action is *lazy*: upon invocation of this method the calculation is
1156 /// booked but not executed. See RResultPtr documentation.
1157 ///
1158 /// ### Example usage:
1159 /// ~~~{.cpp}
1160 /// // Deduce column types (this invocation needs jitting internally)
1161 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1162 /// "myValueX", "myValueY", "myValueZ");
1163 /// // Explicit column types
1164 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1165 /// "myValueX", "myValueY", "myValueZ");
1166 /// ~~~
1167 ///
1168 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1169 typename V3 = RDFDetail::RInferredType>
1171 std::string_view v3Name = "")
1172 {
1173 std::shared_ptr<::TH3D> h(nullptr);
1174 {
1175 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1176 h = model.GetHistogram();
1177 }
1178 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1179 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1180 }
1181 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1182 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1183 ? ColumnNames_t()
1184 : ColumnNames_t(columnViews.begin(), columnViews.end());
1185 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h);
1186 }
1187
1188 ////////////////////////////////////////////////////////////////////////////
1189 /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1190 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1191 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1192 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1193 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1194 /// \param[in] model The returned histogram will be constructed using this as a model.
1195 /// \param[in] v1Name The name of the column that will fill the x axis.
1196 /// \param[in] v2Name The name of the column that will fill the y axis.
1197 /// \param[in] v3Name The name of the column that will fill the z axis.
1198 /// \param[in] wName The name of the column that will provide the weights.
1199 /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1200 ///
1201 /// This action is *lazy*: upon invocation of this method the calculation is
1202 /// booked but not executed. See RResultPtr documentation.
1203 ///
1204 /// ### Example usage:
1205 /// ~~~{.cpp}
1206 /// // Deduce column types (this invocation needs jitting internally)
1207 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1208 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1209 /// // Explicit column types
1210 /// using d_t = double;
1211 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1212 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1213 /// ~~~
1214 ///
1215 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1216 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1218 std::string_view v3Name, std::string_view wName)
1219 {
1220 std::shared_ptr<::TH3D> h(nullptr);
1221 {
1222 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1223 h = model.GetHistogram();
1224 }
1225 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1226 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1227 }
1228 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1229 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1230 ? ColumnNames_t()
1231 : ColumnNames_t(columnViews.begin(), columnViews.end());
1232 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h);
1233 }
1234
1235 template <typename V1, typename V2, typename V3, typename W>
1237 {
1238 return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1239 }
1240
1241 ////////////////////////////////////////////////////////////////////////////
1242 /// \brief Fill and return a graph (*lazy action*)
1243 /// \tparam V1 The type of the column used to fill the x axis of the graph.
1244 /// \tparam V2 The type of the column used to fill the y axis of the graph.
1245 /// \param[in] v1Name The name of the column that will fill the x axis.
1246 /// \param[in] v2Name The name of the column that will fill the y axis.
1247 /// \return the graph wrapped in a `RResultPtr`.
1248 ///
1249 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1250 /// is filled with each one of the elements of the container.
1251 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1252 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1253 /// A name and a title to the graph is given based on the input column names.
1254 ///
1255 /// This action is *lazy*: upon invocation of this method the calculation is
1256 /// booked but not executed. See RResultPtr documentation.
1257 ///
1258 /// ### Example usage:
1259 /// ~~~{.cpp}
1260 /// // Deduce column types (this invocation needs jitting internally)
1261 /// auto myGraph1 = myDf.Graph("xValues", "yValues");
1262 /// // Explicit column types
1263 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
1264 /// ~~~
1265 ///
1266 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1268 {
1269 auto graph = std::make_shared<::TGraph>();
1270 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1271 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1272 ? ColumnNames_t()
1273 : ColumnNames_t(columnViews.begin(), columnViews.end());
1274
1275 const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
1276
1277 // We build a default name and title based on the input columns
1278 if (!(validatedColumns[0].empty() && validatedColumns[1].empty())) {
1279 const auto g_name = std::string(v1Name) + "_vs_" + std::string(v2Name);
1280 const auto g_title = std::string(v1Name) + " vs " + std::string(v2Name);
1281 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
1282 graph->GetXaxis()->SetTitle(std::string(v1Name).c_str());
1283 graph->GetYaxis()->SetTitle(std::string(v2Name).c_str());
1284 }
1285
1286 return CreateAction<RDFInternal::ActionTags::Graph, V1, V2>(validatedColumns, graph);
1287 }
1288
1289 ////////////////////////////////////////////////////////////////////////////
1290 /// \brief Fill and return a one-dimensional profile (*lazy action*)
1291 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1292 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1293 /// \param[in] model The model to be considered to build the new return value.
1294 /// \param[in] v1Name The name of the column that will fill the x axis.
1295 /// \param[in] v2Name The name of the column that will fill the y axis.
1296 /// \return the monodimensional profile wrapped in a `RResultPtr`.
1297 ///
1298 /// This action is *lazy*: upon invocation of this method the calculation is
1299 /// booked but not executed. See RResultPtr documentation.
1300 ///
1301 /// ### Example usage:
1302 /// ~~~{.cpp}
1303 /// // Deduce column types (this invocation needs jitting internally)
1304 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1305 /// // Explicit column types
1306 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1307 /// ~~~
1308 ///
1309 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1311 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1312 {
1313 std::shared_ptr<::TProfile> h(nullptr);
1314 {
1315 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1316 h = model.GetProfile();
1317 }
1318
1319 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1320 throw std::runtime_error("Profiles with no axes limits are not supported yet.");
1321 }
1322 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1323 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1324 ? ColumnNames_t()
1325 : ColumnNames_t(columnViews.begin(), columnViews.end());
1326 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h);
1327 }
1328
1329 ////////////////////////////////////////////////////////////////////////////
1330 /// \brief Fill and return a one-dimensional profile (*lazy action*)
1331 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1332 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1333 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
1334 /// \param[in] model The model to be considered to build the new return value.
1335 /// \param[in] v1Name The name of the column that will fill the x axis.
1336 /// \param[in] v2Name The name of the column that will fill the y axis.
1337 /// \param[in] wName The name of the column that will provide the weights.
1338 /// \return the monodimensional profile wrapped in a `RResultPtr`.
1339 ///
1340 /// This action is *lazy*: upon invocation of this method the calculation is
1341 /// booked but not executed. See RResultPtr documentation.
1342 ///
1343 /// ### Example usage:
1344 /// ~~~{.cpp}
1345 /// // Deduce column types (this invocation needs jitting internally)
1346 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
1347 /// // Explicit column types
1348 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
1349 /// "xValues", "yValues", "weight");
1350 /// ~~~
1351 ///
1352 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1353 typename W = RDFDetail::RInferredType>
1356 {
1357 std::shared_ptr<::TProfile> h(nullptr);
1358 {
1359 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1360 h = model.GetProfile();
1361 }
1362
1363 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1364 throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
1365 }
1366 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1367 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1368 ? ColumnNames_t()
1369 : ColumnNames_t(columnViews.begin(), columnViews.end());
1370 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h);
1371 }
1372
1373 template <typename V1, typename V2, typename W>
1375 {
1376 return Profile1D<V1, V2, W>(model, "", "", "");
1377 }
1378
1379 ////////////////////////////////////////////////////////////////////////////
1380 /// \brief Fill and return a two-dimensional profile (*lazy action*)
1381 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1382 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1383 /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1384 /// \param[in] model The returned profile will be constructed using this as a model.
1385 /// \param[in] v1Name The name of the column that will fill the x axis.
1386 /// \param[in] v2Name The name of the column that will fill the y axis.
1387 /// \param[in] v3Name The name of the column that will fill the z axis.
1388 /// \return the bidimensional profile wrapped in a `RResultPtr`.
1389 ///
1390 /// This action is *lazy*: upon invocation of this method the calculation is
1391 /// booked but not executed. See RResultPtr documentation.
1392 ///
1393 /// ### Example usage:
1394 /// ~~~{.cpp}
1395 /// // Deduce column types (this invocation needs jitting internally)
1396 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1397 /// "xValues", "yValues", "zValues");
1398 /// // Explicit column types
1399 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1400 /// "xValues", "yValues", "zValues");
1401 /// ~~~
1402 ///
1403 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1404 typename V3 = RDFDetail::RInferredType>
1406 std::string_view v2Name = "", std::string_view v3Name = "")
1407 {
1408 std::shared_ptr<::TProfile2D> h(nullptr);
1409 {
1410 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1411 h = model.GetProfile();
1412 }
1413
1414 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1415 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1416 }
1417 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1418 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1419 ? ColumnNames_t()
1420 : ColumnNames_t(columnViews.begin(), columnViews.end());
1421 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h);
1422 }
1423
1424 ////////////////////////////////////////////////////////////////////////////
1425 /// \brief Fill and return a two-dimensional profile (*lazy action*)
1426 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1427 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1428 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1429 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1430 /// \param[in] model The returned histogram will be constructed using this as a model.
1431 /// \param[in] v1Name The name of the column that will fill the x axis.
1432 /// \param[in] v2Name The name of the column that will fill the y axis.
1433 /// \param[in] v3Name The name of the column that will fill the z axis.
1434 /// \param[in] wName The name of the column that will provide the weights.
1435 /// \return the bidimensional profile wrapped in a `RResultPtr`.
1436 ///
1437 /// This action is *lazy*: upon invocation of this method the calculation is
1438 /// booked but not executed. See RResultPtr documentation.
1439 ///
1440 /// ### Example usage:
1441 /// ~~~{.cpp}
1442 /// // Deduce column types (this invocation needs jitting internally)
1443 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1444 /// "xValues", "yValues", "zValues", "weight");
1445 /// // Explicit column types
1446 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1447 /// "xValues", "yValues", "zValues", "weight");
1448 /// ~~~
1449 ///
1450 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1451 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1453 std::string_view v3Name, std::string_view wName)
1454 {
1455 std::shared_ptr<::TProfile2D> h(nullptr);
1456 {
1457 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1458 h = model.GetProfile();
1459 }
1460
1461 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1462 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1463 }
1464 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1465 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1466 ? ColumnNames_t()
1467 : ColumnNames_t(columnViews.begin(), columnViews.end());
1468 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h);
1469 }
1470
1471 template <typename V1, typename V2, typename V3, typename W>
1473 {
1474 return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
1475 }
1476
1477 ////////////////////////////////////////////////////////////////////////////
1478 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1479 ///
1480 /// T must be a type that provides a copy- or move-constructor and a `T::Fill` method that takes as many arguments
1481 /// as the column names pass as columnList. The arguments of `T::Fill` must have type equal to the one of the
1482 /// specified columns (these types are passed as template parameters to this method).
1483 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object.
1484 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
1485 /// \tparam T The type of the object to fill. Automatically deduced.
1486 /// \param[in] model The model to be considered to build the new return value.
1487 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1488 /// \return the filled object wrapped in a `RResultPtr`.
1489 ///
1490 /// The user gives up ownership of the model object.
1491 /// The list of column names to be used for filling must always be specified.
1492 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
1493 /// See RResultPtr documentation.
1494 ///
1495 /// ### Example usage:
1496 /// ~~~{.cpp}
1497 /// MyClass obj;
1498 /// auto myFilledObj = myDf.Fill<float>(obj, {"col0", "col1"});
1499 /// ~~~
1500 ///
1501 template <typename FirstColumn, typename... OtherColumns, typename T> // need FirstColumn to disambiguate overloads
1502 RResultPtr<T> Fill(T &&model, const ColumnNames_t &columnList)
1503 {
1504 auto h = std::make_shared<T>(std::forward<T>(model));
1505 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1506 throw std::runtime_error("The absence of axes limits is not supported yet.");
1507 }
1508 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h);
1509 }
1510
1511 ////////////////////////////////////////////////////////////////////////////
1512 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1513 ///
1514 /// This overload infers the types of the columns specified in columnList at runtime and just-in-time compiles the
1515 /// method with these types. See previous overload for more information.
1516 /// \tparam T The type of the object to fill. Automatically deduced.
1517 /// \param[in] model The model to be considered to build the new return value.
1518 /// \param[in] columnList The name of the columns read to fill the object.
1519 /// \return the filled object wrapped in a `RResultPtr`.
1520 ///
1521 /// This overload of `Fill` infers the type of the specified columns at runtime and just-in-time compiles the
1522 /// previous overload. Check the previous overload for more details on `Fill`.
1523 ///
1524 /// ### Example usage:
1525 /// ~~~{.cpp}
1526 /// MyClass obj;
1527 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
1528 /// ~~~
1529 ///
1530 template <typename T>
1531 RResultPtr<T> Fill(T &&model, const ColumnNames_t &bl)
1532 {
1533 auto h = std::make_shared<T>(std::forward<T>(model));
1534 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1535 throw std::runtime_error("The absence of axes limits is not supported yet.");
1536 }
1537 return CreateAction<RDFInternal::ActionTags::Fill, RDFDetail::RInferredType>(bl, h, bl.size());
1538 }
1539
1540 ////////////////////////////////////////////////////////////////////////////
1541 /// \brief Return a TStatistic object, filled once per event (*lazy action*)
1542 ///
1543 /// \tparam V The type of the value column
1544 /// \param[in] value The name of the column with the values to fill the statistics with.
1545 /// \return the filled TStatistic object wrapped in a `RResultPtr`.
1546 ///
1547 /// ### Example usage:
1548 /// ~~~{.cpp}
1549 /// // Deduce column type (this invocation needs jitting internally)
1550 /// auto stats0 = myDf.Stats("values");
1551 /// // Explicit column type
1552 /// auto stats1 = myDf.Stats<float>("values");
1553 /// ~~~
1554 ///
1555 template<typename V = RDFDetail::RInferredType>
1557 {
1558 ColumnNames_t columns;
1559 if (!value.empty()) {
1560 columns.emplace_back(std::string(value));
1561 }
1562 const auto validColumnNames = GetValidatedColumnNames(1, columns);
1563 if (std::is_same<V, RDFDetail::RInferredType>::value) {
1564 return Fill(TStatistic(), validColumnNames);
1565 }
1566 else {
1567 return Fill<V>(TStatistic(), validColumnNames);
1568 }
1569 }
1570
1571 ////////////////////////////////////////////////////////////////////////////
1572 /// \brief Return a TStatistic object, filled once per event (*lazy action*)
1573 ///
1574 /// \tparam V The type of the value column
1575 /// \tparam W The type of the weight column
1576 /// \param[in] value The name of the column with the values to fill the statistics with.
1577 /// \param[in] weight The name of the column with the weights to fill the statistics with.
1578 /// \return the filled TStatistic object wrapped in a `RResultPtr`.
1579 ///
1580 /// ### Example usage:
1581 /// ~~~{.cpp}
1582 /// // Deduce column types (this invocation needs jitting internally)
1583 /// auto stats0 = myDf.Stats("values", "weights");
1584 /// // Explicit column types
1585 /// auto stats1 = myDf.Stats<int, float>("values", "weights");
1586 /// ~~~
1587 ///
1588 template<typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1590 {
1591 ColumnNames_t columns {std::string(value), std::string(weight)};
1592 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
1593 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
1594 const auto validColumnNames = GetValidatedColumnNames(2, columns);
1595 // We have 3 cases:
1596 // 1. Both types are inferred: we use Fill and let the jit kick in.
1597 // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
1598 // 3. Both types are explicit: we invoke the fully compiled Fill method.
1599 if (vIsInferred && wIsInferred) {
1600 return Fill(TStatistic(), validColumnNames);
1601 } else if (vIsInferred != wIsInferred) {
1602 std::string error("The ");
1603 error += vIsInferred ? "value " : "weight ";
1604 error += "column type is explicit, while the ";
1605 error += vIsInferred ? "weight " : "value ";
1606 error += " is specified to be inferred. This case is not supported: please specify both types or none.";
1607 throw std::runtime_error(error);
1608 } else {
1609 return Fill<V, W>(TStatistic(), validColumnNames);
1610 }
1611 }
1612
1613 ////////////////////////////////////////////////////////////////////////////
1614 /// \brief Return the minimum of processed column values (*lazy action*)
1615 /// \tparam T The type of the branch/column.
1616 /// \param[in] columnName The name of the branch/column to be treated.
1617 /// \return the minimum value of the selected column wrapped in a `RResultPtr`.
1618 ///
1619 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1620 /// template specialization of this method.
1621 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1622 ///
1623 /// This action is *lazy*: upon invocation of this method the calculation is
1624 /// booked but not executed. See RResultPtr documentation.
1625 ///
1626 /// ### Example usage:
1627 /// ~~~{.cpp}
1628 /// // Deduce column type (this invocation needs jitting internally)
1629 /// auto minVal0 = myDf.Min("values");
1630 /// // Explicit column type
1631 /// auto minVal1 = myDf.Min<double>("values");
1632 /// ~~~
1633 ///
1634 template <typename T = RDFDetail::RInferredType>
1636 {
1637 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1638 using RetType_t = RDFDetail::MinReturnType_t<T>;
1639 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
1640 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV);
1641 }
1642
1643 ////////////////////////////////////////////////////////////////////////////
1644 /// \brief Return the maximum of processed column values (*lazy action*)
1645 /// \tparam T The type of the branch/column.
1646 /// \param[in] columnName The name of the branch/column to be treated.
1647 /// \return the maximum value of the selected column wrapped in a `RResultPtr`.
1648 ///
1649 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1650 /// template specialization of this method.
1651 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1652 ///
1653 /// This action is *lazy*: upon invocation of this method the calculation is
1654 /// booked but not executed. See RResultPtr documentation.
1655 ///
1656 /// ### Example usage:
1657 /// ~~~{.cpp}
1658 /// // Deduce column type (this invocation needs jitting internally)
1659 /// auto maxVal0 = myDf.Max("values");
1660 /// // Explicit column type
1661 /// auto maxVal1 = myDf.Max<double>("values");
1662 /// ~~~
1663 ///
1664 template <typename T = RDFDetail::RInferredType>
1666 {
1667 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1668 using RetType_t = RDFDetail::MaxReturnType_t<T>;
1669 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
1670 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV);
1671 }
1672
1673 ////////////////////////////////////////////////////////////////////////////
1674 /// \brief Return the mean of processed column values (*lazy action*)
1675 /// \tparam T The type of the branch/column.
1676 /// \param[in] columnName The name of the branch/column to be treated.
1677 /// \return the mean value of the selected column wrapped in a `RResultPtr`.
1678 ///
1679 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1680 /// template specialization of this method.
1681 ///
1682 /// This action is *lazy*: upon invocation of this method the calculation is
1683 /// booked but not executed. See RResultPtr documentation.
1684 ///
1685 /// ### Example usage:
1686 /// ~~~{.cpp}
1687 /// // Deduce column type (this invocation needs jitting internally)
1688 /// auto meanVal0 = myDf.Mean("values");
1689 /// // Explicit column type
1690 /// auto meanVal1 = myDf.Mean<double>("values");
1691 /// ~~~
1692 ///
1693 template <typename T = RDFDetail::RInferredType>
1695 {
1696 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1697 auto meanV = std::make_shared<double>(0);
1698 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV);
1699 }
1700
1701 ////////////////////////////////////////////////////////////////////////////
1702 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*)
1703 /// \tparam T The type of the branch/column.
1704 /// \param[in] columnName The name of the branch/column to be treated.
1705 /// \return the standard deviation value of the selected column wrapped in a `RResultPtr`.
1706 ///
1707 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1708 /// template specialization of this method.
1709 ///
1710 /// This action is *lazy*: upon invocation of this method the calculation is
1711 /// booked but not executed. See RResultPtr documentation.
1712 ///
1713 /// ### Example usage:
1714 /// ~~~{.cpp}
1715 /// // Deduce column type (this invocation needs jitting internally)
1716 /// auto stdDev0 = myDf.StdDev("values");
1717 /// // Explicit column type
1718 /// auto stdDev1 = myDf.StdDev<double>("values");
1719 /// ~~~
1720 ///
1721 template <typename T = RDFDetail::RInferredType>
1723 {
1724 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1725 auto stdDeviationV = std::make_shared<double>(0);
1726 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV);
1727 }
1728
1729 // clang-format off
1730 ////////////////////////////////////////////////////////////////////////////
1731 /// \brief Return the sum of processed column values (*lazy action*)
1732 /// \tparam T The type of the branch/column.
1733 /// \param[in] columnName The name of the branch/column.
1734 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
1735 /// \return the sum of the selected column wrapped in a `RResultPtr`.
1736 ///
1737 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1738 /// template specialization of this method.
1739 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1740 ///
1741 /// This action is *lazy*: upon invocation of this method the calculation is
1742 /// booked but not executed. See RResultPtr documentation.
1743 ///
1744 /// ### Example usage:
1745 /// ~~~{.cpp}
1746 /// // Deduce column type (this invocation needs jitting internally)
1747 /// auto sum0 = myDf.Sum("values");
1748 /// // Explicit column type
1749 /// auto sum1 = myDf.Sum<double>("values");
1750 /// ~~~
1751 ///
1752 template <typename T = RDFDetail::RInferredType>
1754 Sum(std::string_view columnName = "",
1755 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
1756 {
1757 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1758 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
1759 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV);
1760 }
1761 // clang-format on
1762
1763 ////////////////////////////////////////////////////////////////////////////
1764 /// \brief Gather filtering statistics
1765 /// \return the resulting `RCutFlowReport` instance wrapped in a `RResultPtr`.
1766 ///
1767 /// Calling `Report` on the main `RDataFrame` object gathers stats for
1768 /// all named filters in the call graph. Calling this method on a
1769 /// stored chain state (i.e. a graph node different from the first) gathers
1770 /// the stats for all named filters in the chain section between the original
1771 /// `RDataFrame` and that node (included). Stats are gathered in the same
1772 /// order as the named filters have been added to the graph.
1773 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
1774 /// effects cuts had.
1775 ///
1776 /// This action is *lazy*: upon invocation of
1777 /// this method the calculation is booked but not executed. See RResultPtr
1778 /// documentation.
1779 ///
1780 /// ### Example usage:
1781 /// ~~~{.cpp}
1782 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
1783 /// auto cutReport = filtered3.Report();
1784 /// cutReport->Print();
1785 /// ~~~
1786 ///
1788 {
1789 bool returnEmptyReport = false;
1790 // if this is a RInterface<RLoopManager> on which `Define` has been called, users
1791 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
1792 // certainly does not contain named filters.
1793 // The number 4 takes into account the implicit columns for entry and slot number
1794 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
1795 if (std::is_same<Proxied, RLoopManager>::value && fCustomColumns.GetNames().size() > 4)
1796 returnEmptyReport = true;
1797
1798 auto rep = std::make_shared<RCutFlowReport>();
1799 using Helper_t = RDFInternal::ReportHelper<Proxied>;
1801
1802 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}),
1804
1805 fLoopManager->Book(action.get());
1806 return MakeResultPtr(rep, *fLoopManager, std::move(action));
1807 }
1808
1809 /////////////////////////////////////////////////////////////////////////////
1810 /// \brief Returns the names of the available columns
1811 /// \return the container of column names.
1812 ///
1813 /// This is not an action nor a transformation, just a query to the RDataFrame object.
1814 ///
1815 /// ### Example usage:
1816 /// ~~~{.cpp}
1817 /// auto colNames = d.GetColumnNames();
1818 /// // Print columns' names
1819 /// for (auto &&colName : colNames) std::cout << colName << std::endl;
1820 /// ~~~
1821 ///
1823 {
1824 ColumnNames_t allColumns;
1825
1826 auto addIfNotInternal = [&allColumns](std::string_view colName) {
1827 if (!RDFInternal::IsInternalColumn(colName))
1828 allColumns.emplace_back(colName);
1829 };
1830
1831 auto columnNames = fCustomColumns.GetNames();
1832
1833 std::for_each(columnNames.begin(), columnNames.end(), addIfNotInternal);
1834
1835 auto tree = fLoopManager->GetTree();
1836 if (tree) {
1837 auto branchNames = RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false);
1838 allColumns.insert(allColumns.end(), branchNames.begin(), branchNames.end());
1839 }
1840
1841 if (fDataSource) {
1842 auto &dsColNames = fDataSource->GetColumnNames();
1843 allColumns.insert(allColumns.end(), dsColNames.begin(), dsColNames.end());
1844 }
1845
1846 return allColumns;
1847 }
1848
1849 /////////////////////////////////////////////////////////////////////////////
1850 /// \brief Return the type of a given column as a string.
1851 /// \return the type of the required column.
1852 ///
1853 /// This is not an action nor a transformation, just a query to the RDataFrame object.
1854 ///
1855 /// ### Example usage:
1856 /// ~~~{.cpp}
1857 /// auto colType = d.GetColumnType("columnName");
1858 /// // Print column type
1859 /// std::cout << "Column " << colType << " has type " << colType << std::endl;
1860 /// ~~~
1861 ///
1863 {
1864 const auto col = std::string(column);
1865 const bool convertVector2RVec = true;
1866 RDFDetail::RCustomColumnBase *customCol =
1867 fCustomColumns.HasName(column) ? fCustomColumns.GetColumns().at(col).get() : nullptr;
1869 customCol, convertVector2RVec);
1870 }
1871
1872 /// \brief Returns the names of the filters created.
1873 /// \return the container of filters names.
1874 ///
1875 /// If called on a root node, all the filters in the computation graph will
1876 /// be printed. For any other node, only the filters upstream of that node.
1877 /// Filters without a name are printed as "Unnamed Filter"
1878 /// This is not an action nor a transformation, just a query to the RDataFrame object.
1879 ///
1880 /// ### Example usage:
1881 /// ~~~{.cpp}
1882 /// auto filtNames = d.GetFilterNames();
1883 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
1884 /// ~~~
1885 ///
1886 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
1887
1888 /// \brief Returns the names of the defined columns
1889 /// \return the container of the defined column names.
1890 ///
1891 /// This is not an action nor a transformation, just a simple utility to
1892 /// get the columns names that have been defined up to the node.
1893 /// If no custom column has been defined, e.g. on a root node, it returns an
1894 /// empty collection.
1895 ///
1896 /// ### Example usage:
1897 /// ~~~{.cpp}
1898 /// auto defColNames = d.GetDefinedColumnNames();
1899 /// // Print defined columns' names
1900 /// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
1901 /// ~~~
1902 ///
1904 {
1905 ColumnNames_t definedColumns;
1906
1907 auto columns = fCustomColumns.GetColumns();
1908
1909 for (auto column : columns) {
1910 if (!RDFInternal::IsInternalColumn(column.first) && !column.second->IsDataSourceColumn())
1911 definedColumns.emplace_back(column.first);
1912 }
1913
1914 return definedColumns;
1915 }
1916
1917 /// \brief Checks if a column is present in the dataset
1918 /// \return true if the column is available, false otherwise
1919 ///
1920 /// This method checks if a column is part of the input ROOT dataset, has
1921 /// been defined or can be provided by the data source.
1922 ///
1923 /// Example usage:
1924 /// ~~~{.cpp}
1925 /// ROOT::RDataFrame base(1);
1926 /// auto rdf = base.Define("definedColumn", [](){return 0;});
1927 /// rdf.HasColumn("definedColumn"); // true: we defined it
1928 /// rdf.HasColumn("rdfentry_"); // true: it's always there
1929 /// rdf.HasColumn("foo"); // false: it is not there
1930 /// ~~~
1932 {
1933 if (fCustomColumns.HasName(columnName))
1934 return true;
1935
1936 if (auto tree = fLoopManager->GetTree()) {
1937 const auto &branchNames = fLoopManager->GetBranchNames();
1938 const auto branchNamesEnd = branchNames.end();
1939 if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
1940 return true;
1941 }
1942
1943 if (fDataSource && fDataSource->HasColumn(columnName))
1944 return true;
1945
1946 return false;
1947 }
1948
1949 /// \brief Gets the number of data processing slots
1950 /// \return The number of data processing slots used by this RDataFrame instance
1951 ///
1952 /// This method returns the number of data processing slots used by this RDataFrame
1953 /// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
1954 ///
1955 /// Example usage:
1956 /// ~~~{.cpp}
1957 /// ROOT::EnableImplicitMT(6)
1958 /// ROOT::RDataFrame df(1);
1959 /// std::cout << df.GetNSlots() << std::endl; // prints "6"
1960 /// ~~~
1961 unsigned int GetNSlots() const { return fLoopManager->GetNSlots(); }
1962
1963 /// \brief Gets the number of event loops run
1964 /// \return The number of event loops run by this RDataFrame instance
1965 ///
1966 /// This method returns the number of events loops run so far by this RDataFrame instance.
1967 ///
1968 /// Example usage:
1969 /// ~~~{.cpp}
1970 /// ROOT::RDataFrame df(1);
1971 /// std::cout << df.GetNRuns() << std::endl; // prints "0"
1972 /// df.Sum("rdfentry_").GetValue(); // trigger the event loop
1973 /// std::cout << df.GetNRuns() << std::endl; // prints "1"
1974 /// df.Sum("rdfentry_").GetValue(); // trigger another event loop
1975 /// std::cout << df.GetNRuns() << std::endl; // prints "2"
1976 /// ~~~
1977 unsigned int GetNRuns() const { return fLoopManager->GetNRuns(); }
1978
1979 // clang-format off
1980 ////////////////////////////////////////////////////////////////////////////
1981 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
1982 /// \tparam F The type of the aggregator callable. Automatically deduced.
1983 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
1984 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1985 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
1986 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
1987 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
1988 /// \param[in] aggIdentity The aggregator variable of each thread is initialised to this value (or is default-constructed if the parameter is omitted)
1989 /// \return the result of the aggregation wrapped in a `RResultPtr`.
1990 ///
1991 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
1992 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
1993 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
1994 /// the value of the column columnName.
1995 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
1996 /// Otherwise the signature of aggregator must be `void(U&,T)`.
1997 ///
1998 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
1999 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2000 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2001 ///
2002 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2003 ///
2004 /// Example usage:
2005 /// ~~~{.cpp}
2006 /// auto aggregator = [](double acc, double x) { return acc * x; };
2007 /// ROOT::EnableImplicitMT();
2008 /// // If multithread is enabled, the aggregator function will be called by more threads
2009 /// // and will produce a vector of partial accumulators.
2010 /// // The merger function performs the final aggregation of these partial results.
2011 /// auto merger = [](std::vector<double> &accumulators) {
2012 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2013 /// accumulators[0] *= accumulators[i];
2014 /// }
2015 /// };
2016 ///
2017 /// // The accumulator is initialized at this value by every thread.
2018 /// double initValue = 1.;
2019 ///
2020 /// // Multiplies all elements of the column "x"
2021 /// auto result = d.Aggregate(aggregator, merger, columnName, initValue);
2022 /// ~~~
2023 // clang-format on
2024 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2025 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2026 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2027 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2028 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2029 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2030 {
2031 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2032 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2033 constexpr auto nColumns = ArgTypes::list_size;
2034
2035 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2036
2037 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ArgTypes());
2038
2039 auto accObjPtr = std::make_shared<U>(aggIdentity);
2040 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2041 using Action_t = typename RDFInternal::RAction<Helper_t, Proxied>;
2042 auto action = std::make_unique<Action_t>(
2043 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2044 fProxiedPtr, std::move(newColumns));
2045 fLoopManager->Book(action.get());
2046 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2047 }
2048
2049 // clang-format off
2050 ////////////////////////////////////////////////////////////////////////////
2051 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
2052 /// \tparam F The type of the aggregator callable. Automatically deduced.
2053 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2054 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2055 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2056 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2057 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2058 /// \return the result of the aggregation wrapped in a `RResultPtr`.
2059 ///
2060 /// See previous Aggregate overload for more information.
2061 // clang-format on
2062 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2063 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2064 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2065 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2066 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2067 {
2068 static_assert(
2069 std::is_default_constructible<U>::value,
2070 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2071 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2072 }
2073
2074 // clang-format off
2075 ////////////////////////////////////////////////////////////////////////////
2076 /// \brief Book execution of a custom action using a user-defined helper object.
2077 /// \tparam ColumnTypes List of types of columns used by this action.
2078 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2079 /// \param[in] helper The Action Helper to be scheduled.
2080 /// \param[in] columns The names of the columns on which the helper acts.
2081 /// \return the result of the helper wrapped in a `RResultPtr`.
2082 ///
2083 /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2084 /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
2085 /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2086 ///
2087 /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
2088 /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
2089 /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
2090 /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
2091 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2092 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2093 /// the requested columns for the particular entry being processed.
2094 /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
2095 /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
2096 /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
2097 /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations. Can be no-op.
2098 /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2099 /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
2100 /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
2101 /// method concurrently, but will always pass different 'slot' numbers.
2102 /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
2103 /// Result_t). The RResultPtr returned by Book will point to this object.
2104 ///
2105 /// See ActionHelpers.hxx for the helpers used by standard RDF actions.
2106 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2107 // clang-format on
2108 template <typename... ColumnTypes, typename Helper>
2109 RResultPtr<typename Helper::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {})
2110 {
2111 constexpr auto nColumns = sizeof...(ColumnTypes);
2112 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columns.size());
2113
2114 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2115
2116 // TODO add more static sanity checks on Helper
2117 using AH = RDFDetail::RActionImpl<Helper>;
2118 static_assert(std::is_base_of<AH, Helper>::value && std::is_convertible<Helper *, AH *>::value,
2119 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2120
2121 using Action_t = typename RDFInternal::RAction<Helper, Proxied, TTraits::TypeList<ColumnTypes...>>;
2122 auto resPtr = helper.GetResultPtr();
2123
2124 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
2126
2127 auto action = std::make_unique<Action_t>(Helper(std::forward<Helper>(helper)), validColumnNames, fProxiedPtr,
2129 fLoopManager->Book(action.get());
2130 return MakeResultPtr(resPtr, *fLoopManager, std::move(action));
2131 }
2132
2133 ////////////////////////////////////////////////////////////////////////////
2134 /// \brief Provides a representation of the columns in the dataset
2135 /// \tparam ColumnTypes variadic list of branch/column types.
2136 /// \param[in] columnList Names of the columns to be displayed.
2137 /// \param[in] rows Number of events for each column to be displayed.
2138 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2139 ///
2140 /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
2141 /// form. RDisplay will either print on the standard output a summarized version through `Print()` or will return a
2142 /// complete version through `AsString()`.
2143 ///
2144 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2145 ///
2146 /// Example usage:
2147 /// ~~~{.cpp}
2148 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
2149 /// auto d1 = rdf.Display("");
2150 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
2151 /// auto d2 = d.Display({"x", "y"}, 128);
2152 /// // Printing the short representations, the event loop will run
2153 /// d1->Print();
2154 /// d2->Print();
2155 /// ~~~
2156 template <typename... ColumnTypes>
2157 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2158 {
2159 CheckIMTDisabled("Display");
2160
2161 auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2162 return CreateAction<RDFInternal::ActionTags::Display, ColumnTypes...>(columnList, displayer);
2163 }
2164
2165 ////////////////////////////////////////////////////////////////////////////
2166 /// \brief Provides a representation of the columns in the dataset
2167 /// \param[in] columnList Names of the columns to be displayed.
2168 /// \param[in] rows Number of events for each column to be displayed.
2169 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2170 ///
2171 /// This overload automatically infers the column types.
2172 /// See the previous overloads for further details.
2173 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2174 {
2175 CheckIMTDisabled("Display");
2176 auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2177 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(columnList, displayer,
2178 columnList.size());
2179 }
2180
2181 ////////////////////////////////////////////////////////////////////////////
2182 /// \brief Provides a representation of the columns in the dataset
2183 /// \param[in] columnNameRegexp A regular expression to select the columns.
2184 /// \param[in] rows Number of events for each column to be displayed.
2185 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2186 ///
2187 /// The existing columns are matched against the regular expression. If the string provided
2188 /// is empty, all columns are selected.
2189 /// See the previous overloads for further details.
2190 RResultPtr<RDisplay> Display(std::string_view columnNameRegexp = "", const int &nRows = 5)
2191 {
2193 columnNameRegexp, "Display");
2194 return Display(selectedColumns, nRows);
2195 }
2196
2197 ////////////////////////////////////////////////////////////////////////////
2198 /// \brief Provides a representation of the columns in the dataset
2199 /// \param[in] columnList Names of the columns to be displayed.
2200 /// \param[in] nRows Number of events for each column to be displayed.
2201 /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2202 ///
2203 /// See the previous overloads for further details.
2204 RResultPtr<RDisplay> Display(std::initializer_list<std::string> columnList, const int &nRows = 5)
2205 {
2206 ColumnNames_t selectedColumns(columnList);
2207 return Display(selectedColumns, nRows);
2208 }
2209
2210private:
2212 {
2214
2215 // Entry number column
2216 const std::string entryColName = "rdfentry_";
2217 const std::string entryColType = "ULong64_t";
2218 auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
2219 using NewColEntry_t =
2220 RDFDetail::RCustomColumn<decltype(entryColGen), RDFDetail::CustomColExtraArgs::SlotAndEntry>;
2221
2222 auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
2223 ColumnNames_t{}, fLoopManager->GetNSlots(), newCols);
2224 newCols.AddName(entryColName);
2225 newCols.AddColumn(entryColumn, entryColName);
2226
2227 // Slot number column
2228 const std::string slotColName = "rdfslot_";
2229 const std::string slotColType = "unsigned int";
2230 auto slotColGen = [](unsigned int slot) { return slot; };
2231 using NewColSlot_t = RDFDetail::RCustomColumn<decltype(slotColGen), RDFDetail::CustomColExtraArgs::Slot>;
2232
2233 auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen), ColumnNames_t{},
2234 fLoopManager->GetNSlots(), newCols);
2235 newCols.AddName(slotColName);
2236 newCols.AddColumn(slotColumn, slotColName);
2237
2238 fCustomColumns = std::move(newCols);
2239
2240 fLoopManager->AddColumnAlias("tdfentry_", entryColName);
2241 fCustomColumns.AddName("tdfentry_");
2242 fLoopManager->AddColumnAlias("tdfslot_", slotColName);
2243 fCustomColumns.AddName("tdfslot_");
2244 }
2245
2246 std::vector<std::string> GetColumnTypeNamesList(const ColumnNames_t &columnList)
2247 {
2248 std::vector<std::string> types;
2249
2250 for (auto column : columnList) {
2251 types.push_back(GetColumnType(column));
2252 }
2253 return types;
2254 }
2255
2257 {
2259 std::string error(callerName);
2260 error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
2261 throw std::runtime_error(error);
2262 }
2263 }
2264
2265 // Type was specified by the user, no need to infer it
2266 template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
2267 typename std::enable_if<!RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
2268 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r)
2269 {
2270 constexpr auto nColumns = sizeof...(BranchTypes);
2271
2272 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2273
2274 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
2276
2277 const auto nSlots = fLoopManager->GetNSlots();
2278
2279 auto action = RDFInternal::BuildAction<BranchTypes...>(validColumnNames, r, nSlots, fProxiedPtr, ActionTag{},
2280 std::move(newColumns));
2281 fLoopManager->Book(action.get());
2282 return MakeResultPtr(r, *fLoopManager, std::move(action));
2283 }
2284
2285 // User did not specify type, do type inference
2286 // This version of CreateAction has a `nColumns` optional argument. If present, the number of required columns for
2287 // this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(BranchTypes)
2288 template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
2289 typename std::enable_if<RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
2291 CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r, const int nColumns = -1)
2292 {
2293 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(BranchTypes));
2294
2295 const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
2296 const unsigned int nSlots = fLoopManager->GetNSlots();
2297
2298 auto tree = fLoopManager->GetTree();
2299 auto rOnHeap = RDFInternal::MakeWeakOnHeap(r);
2300
2301 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
2302 using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
2303 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
2304
2305 const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(*fLoopManager);
2306 auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
2307
2308 auto toJit = RDFInternal::JitBuildAction(validColumnNames, upcastNodeOnHeap,
2309 typeid(std::weak_ptr<ActionResultType>), typeid(ActionTag), rOnHeap,
2310 tree, nSlots, fCustomColumns, fDataSource, jittedActionOnHeap);
2311 fLoopManager->Book(jittedAction.get());
2312 fLoopManager->ToJitExec(toJit);
2313 return MakeResultPtr(r, *fLoopManager, jittedAction);
2314 }
2315
2316 template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
2317 typename std::enable_if<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>::type
2318 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
2319 {
2323
2324 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
2325 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
2326 std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::Slot>::value, ArgTypes_t>::type;
2327 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
2328 std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::SlotAndEntry>::value, ColTypesTmp_t>::type;
2329
2330 constexpr auto nColumns = ColTypes_t::list_size;
2331
2332 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2333
2334 auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
2335
2336 // Declare return type to the interpreter, for future use by jitted actions
2337 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
2338 if (retTypeName.empty()) {
2339 // The type is not known to the interpreter.
2340 // We must not error out here, but if/when this column is used in jitted code
2341 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
2342 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
2343 }
2344
2346 RDFInternal::RBookedCustomColumns newCols(newColumns);
2347 auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
2348 fLoopManager->GetNSlots(), newCols);
2349
2350 newCols.AddName(name);
2351 newCols.AddColumn(newColumn, name);
2352
2353 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
2354
2355 return newInterface;
2356 }
2357
2358 // This overload is chosen when the callable passed to Define or DefineSlot returns void.
2359 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
2360 // this way compilation of `Define` has no way to continue after throwing the error.
2361 template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
2362 bool IsFStringConv = std::is_convertible<F, std::string>::value,
2363 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
2364 typename std::enable_if<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>::type
2366 {
2367 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
2368 "Error in `Define`: type returned by expression is not default-constructible");
2369 return *this; // never reached
2370 }
2371
2372 ////////////////////////////////////////////////////////////////////////////
2373 /// \brief Implementation of snapshot
2374 /// \param[in] treename The name of the TTree
2375 /// \param[in] filename The name of the TFile
2376 /// \param[in] columnList The list of names of the branches to be written
2377 /// The implementation exploits Foreach. The association of the addresses to
2378 /// the branches takes place at the first event. This is possible because
2379 /// since there are no copies, the address of the value passed by reference
2380 /// is the address pointing to the storage of the read/created object in/by
2381 /// the TTreeReaderValue/TemporaryBranch
2382 template <typename... ColumnTypes>
2384 const ColumnNames_t &columnList, const RSnapshotOptions &options)
2385 {
2386 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnList.size());
2387
2388 const auto validCols = GetValidatedColumnNames(columnList.size(), columnList);
2389
2390 auto newColumns = CheckAndFillDSColumns(validCols, std::index_sequence_for<ColumnTypes...>(),
2392
2393 const std::string fullTreename(treename);
2394 // split name into directory and treename if needed
2395 const auto lastSlash = treename.rfind('/');
2396 std::string_view dirname = "";
2397 if (std::string_view::npos != lastSlash) {
2398 dirname = treename.substr(0, lastSlash);
2399 treename = treename.substr(lastSlash + 1, treename.size());
2400 }
2401
2402 // add action node to functional graph and run event loop
2403 std::unique_ptr<RDFInternal::RActionBase> actionPtr;
2405 // single-thread snapshot
2406 using Helper_t = RDFInternal::SnapshotHelper<ColumnTypes...>;
2408 actionPtr.reset(new Action_t(Helper_t(filename, dirname, treename, validCols, columnList, options), validCols,
2409 fProxiedPtr, std::move(newColumns)));
2410 } else {
2411 // multi-thread snapshot
2412 using Helper_t = RDFInternal::SnapshotHelperMT<ColumnTypes...>;
2414 actionPtr.reset(new Action_t(
2415 Helper_t(fLoopManager->GetNSlots(), filename, dirname, treename, validCols, columnList, options), validCols,
2416 fProxiedPtr, std::move(newColumns)));
2417 }
2418
2419 fLoopManager->Book(actionPtr.get());
2420
2421 return RDFInternal::CreateSnapshotRDF(validCols, fullTreename, filename, options.fLazy, *fLoopManager,
2422 std::move(actionPtr));
2423 }
2424
2425 ////////////////////////////////////////////////////////////////////////////
2426 /// \brief Implementation of cache
2427 template <typename... BranchTypes, std::size_t... S>
2428 RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...> s)
2429 {
2430 // Check at compile time that the columns types are copy constructible
2431 constexpr bool areCopyConstructible =
2432 RDFInternal::TEvalAnd<std::is_copy_constructible<BranchTypes>::value...>::value;
2433 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
2434
2435 // We share bits and pieces with snapshot. De facto this is a snapshot
2436 // in memory!
2437 RDFInternal::CheckTypesAndPars(sizeof...(BranchTypes), columnList.size());
2438
2439 auto colHolders = std::make_tuple(Take<BranchTypes>(columnList[S])...);
2440 auto ds = std::make_unique<RLazyDS<BranchTypes...>>(std::make_pair(columnList[S], std::get<S>(colHolders))...);
2441
2442 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnList));
2443
2444 (void)s; // Prevents unused warning
2445
2446 return cachedRDF;
2447 }
2448
2449protected:
2450 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
2452 : fProxiedPtr(proxied), fLoopManager(&lm), fDataSource(ds), fCustomColumns(columns)
2453 {
2454 }
2455
2457
2458 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
2459
2460 /// Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames,
2461 /// which is expensive in terms of runtime, is called at most once.
2462 ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
2463 {
2465 fDataSource);
2466 }
2467
2468 template <typename... ColumnTypes, std::size_t... S>
2471 {
2472 return fDataSource ? RDFInternal::AddDSColumns(validCols, fCustomColumns, *fDataSource, fLoopManager->GetNSlots(),
2473 std::index_sequence_for<ColumnTypes...>(),
2476 }
2477};
2478
2479} // end NS RDF
2480
2481} // namespace ROOT
2482
2483#endif // ROOT_RDF_INTERFACE
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
#define h(i)
Definition: RSha256.hxx:106
unsigned int UInt_t
Definition: RtypesCore.h:44
unsigned long long ULong64_t
Definition: RtypesCore.h:72
const Int_t kError
Definition: TError.h:39
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
typedef void((*Func_t)())
The head node of a RDF computation graph.
const std::map< std::string, std::string > & GetAliasMap() const
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
unsigned int GetNRuns() const
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
void AddColumnAlias(const std::string &alias, const std::string &colName)
RDataSource * GetDataSource() const
unsigned int GetNSlots() const
void Book(RDFInternal::RActionBase *actionPtr)
Helper class that provides the operation graph nodes.
An action node in a RDF computation graph.
Definition: RAction.hxx:217
Encapsulates the columns defined by the user.
void AddColumn(const std::shared_ptr< RDFDetail::RCustomColumnBase > &column, std::string_view name)
Internally it recreates the map with the new column, and swaps with the old one.
ColumnNames_t GetNames() const
Returns the list of the names of the defined columns.
bool HasName(std::string_view name) const
Check if the provided name is tracked in the names list.
const RCustomColumnBasePtrMap_t & GetColumns() const
Returns the list of the pointers to the defined columns.
void AddName(std::string_view name)
Internally it recreates the map with the new column name, and swaps with the old one.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
virtual bool HasColumn(std::string_view) const =0
Checks if the dataset has a certain column.
The public interface to the RDataFrame federation of classes.
Definition: RInterface.hxx:89
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action)
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RBookedCustomColumns &columns, RDataSource *ds)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:533
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action)
RLoopManager * GetLoopManager() const
RResultPtr<::TGraph > Graph(std::string_view v1Name="", std::string_view v2Name="")
Fill and return a graph (lazy action)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot.
Definition: RInterface.hxx:323
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterface(const std::shared_ptr< Proxied > &proxied)
Only enabled when building a RInterface<RLoopManager>
Definition: RInterface.hxx:126
RResultPtr< T > Fill(T &&model, const ColumnNames_t &bl)
Return an object of type T on which T::Fill will be called once per event (lazy action)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const int nColumns=-1)
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action)
Definition: RInterface.hxx:763
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:610
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column.
Definition: RInterface.hxx:294
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action)
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
Definition: RInterface.hxx:105
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action)
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames,...
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
RDFInternal::RBookedCustomColumns fCustomColumns
Contains the custom columns defined up to this node.
Definition: RInterface.hxx:108
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
Definition: RInterface.hxx:228
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:559
std::enable_if<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view, F, const ColumnNames_t &)
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
Definition: RInterface.hxx:402
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:598
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
Definition: RInterface.hxx:653
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", const int &nRows=5)
Provides a representation of the columns in the dataset.
RLoopManager * fLoopManager
Definition: RInterface.hxx:103
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
Definition: RInterface.hxx:96
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action)
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action)
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action)
Definition: RInterface.hxx:854
RResultPtr< T > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action)
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Creates a custom column.
Definition: RInterface.hxx:370
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
Definition: RInterface.hxx:101
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action)
Definition: RInterface.hxx:966
RDFInternal::RBookedCustomColumns CheckAndFillDSColumns(ColumnNames_t validCols, std::index_sequence< S... >, TTraits::TypeList< ColumnTypes... >)
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
Implementation of snapshot.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action)
Definition: RInterface.hxx:993
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
Definition: RInterface.hxx:713
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default)
Definition: RInterface.hxx:887
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
Definition: RInterface.hxx:667
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action)
RResultPtr< typename Helper::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, const int &nRows=5)
Provides a representation of the columns in the dataset.
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:460
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:478
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Creates a custom column with a value dependent on the processing slot and the current entry.
Definition: RInterface.hxx:353
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action)
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:813
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action)
Definition: RInterface.hxx:733
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:248
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
std::enable_if< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > >::type DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:187
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface(RInterface &&)=default
Move-ctor for RInterface.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
Definition: RInterface.hxx:836
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNRuns() const
Gets the number of event loops run.
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action)
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... > s)
Implementation of cache.
RDFDetail::ColumnNames_t ColumnNames_t
Definition: RInterface.hxx:91
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
Definition: RInterface.hxx:212
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end)
Definition: RInterface.hxx:691
std::vector< std::string > GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action)
Definition: RInterface.hxx:929
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action)
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:41
Smart pointer for the return type of actions.
Definition: RResultPtr.hxx:72
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTrees,...
Definition: RDataFrame.hxx:42
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
Statistical variable, defined by its mean and variance (RMS).
Definition: TStatistic.h:33
basic_string_view< char > string_view
#define F(x, y, z)
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, RLoopManager &df, std::shared_ptr< ROOT::Internal::RDF::RActionBase > actionPtr)
Create a RResultPtr and set its pointer to the corresponding RAction This overload is invoked by non-...
Definition: RResultPtr.hxx:346
ColumnNames_t GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
void BookFilterJit(const std::shared_ptr< RJittedFilter > &jittedFilter, std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &branches, const RDFInternal::RBookedCustomColumns &customCols, TTree *tree, RDataSource *ds)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *tree, RDataSource *ds, RCustomColumnBase *customColumn, bool vector2rvec)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:211
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RBookedCustomColumns &customColumns, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:84
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
ColumnNames_t ConvertRegexToColumns(const RDFInternal::RBookedCustomColumns &customColumns, TTree *tree, ROOT::RDF::RDataSource *dataSource, std::string_view columnNameRegexp, std::string_view callerName)
std::string PrettyPrintAddr(const void *const addr)
std::shared_ptr< RJittedCustomColumn > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RDFInternal::RBookedCustomColumns &customCols, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
HeadNode_t CreateSnapshotRDF(const ColumnNames_t &validCols, std::string_view treeName, std::string_view fileName, bool isLazy, RLoopManager &loopManager, std::unique_ptr< RDFInternal::RActionBase > actionPtr)
std::string JitBuildAction(const ColumnNames_t &bl, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &art, const std::type_info &at, void *rOnHeap, TTree *tree, const unsigned int nSlots, const RDFInternal::RBookedCustomColumns &customCols, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
bool IsInternalColumn(std::string_view colName)
Long64_t InterpreterCalc(const std::string &code, const std::string &context)
Definition: RDFUtils.cxx:312
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const ColumnNames_t &validCustomColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
void CheckCustomColumn(std::string_view definedCol, TTree *treePtr, const ColumnNames_t &customCols, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &dataSourceColumns)
double T(double x)
Definition: ChebyshevPol.h:34
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
ROOT type_traits extensions.
Definition: TypeTraits.hxx:21
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: StringConv.hxx:21
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition: TROOT.cxx:526
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:557
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition: TROOT.cxx:543
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1194
RooArgSet S(const RooAbsArg &v1)
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
static constexpr double s
Definition: graph.py:1
Definition: tree.py:1
A collection of options to steer the creation of the dataset on file.
bool fLazy
Do not start the event loop when Snapshot is called.
A struct which stores the parameters of a TH1D.
Definition: HistoModels.hxx:27
std::shared_ptr<::TH1D > GetHistogram() const
A struct which stores the parameters of a TH2D.
Definition: HistoModels.hxx:45
std::shared_ptr<::TH2D > GetHistogram() const
A struct which stores the parameters of a TH3D.
Definition: HistoModels.hxx:70
std::shared_ptr<::TH3D > GetHistogram() const
A struct which stores the parameters of a TProfile.
Definition: HistoModels.hxx:99
std::shared_ptr<::TProfile > GetProfile() const
A struct which stores the parameters of a TProfile2D.
std::shared_ptr<::TProfile2D > GetProfile() const
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:25