Logo ROOT  
Reference Guide
RInterface.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
13
14#include "ROOT/InternalTreeUtils.hxx" // for GetFileNamesFromTree and GetFriendInfo
15#include "ROOT/RDataSource.hxx"
20#include "ROOT/RDF/RDefine.hxx"
22#include "ROOT/RDF/RFilter.hxx"
24#include "ROOT/RDF/RRange.hxx"
25#include "ROOT/RDF/Utils.hxx"
26#include "ROOT/RResultPtr.hxx"
28#include "ROOT/RStringView.hxx"
29#include "ROOT/TypeTraits.hxx"
30#include "RtypesCore.h" // for ULong64_t
31#include "TChain.h" // for checking fLoopManger->GetTree() return type
32#include "TDirectory.h"
33#include "TH1.h" // For Histo actions
34#include "TH2.h" // For Histo actions
35#include "TH3.h" // For Histo actions
36#include "TProfile.h"
37#include "TProfile2D.h"
38#include "TStatistic.h"
39
40#include <algorithm>
41#include <cstddef>
42#include <initializer_list>
43#include <iterator> // std::back_insterter
44#include <limits>
45#include <memory>
46#include <set>
47#include <sstream>
48#include <stdexcept>
49#include <string>
50#include <type_traits> // is_same, enable_if
51#include <typeinfo>
52#include <utility> // std::index_sequence
53#include <vector>
54
55class TGraph;
56
57// Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
58namespace ROOT {
61void EnableImplicitMT(UInt_t numthreads);
62class RDataFrame;
63namespace Internal {
64namespace RDF {
66}
67} // namespace Internal
68} // namespace ROOT
69namespace cling {
70std::string printValue(ROOT::RDataFrame *tdf);
71}
72
73namespace ROOT {
74namespace RDF {
77namespace TTraits = ROOT::TypeTraits;
78
79template <typename Proxied, typename DataSource>
80class RInterface;
81
82using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
83
84// clang-format off
85/**
86 * \class ROOT::RDF::RInterface
87 * \ingroup dataframe
88 * \brief The public interface to the RDataFrame federation of classes.
89 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
90 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
91 *
92 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
93 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
94 */
95// clang-format on
96template <typename Proxied, typename DataSource = void>
98 using DS_t = DataSource;
102 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
104
105 template <typename T, typename W>
106 friend class RInterface;
107
108 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
109 ///< The RLoopManager at the root of this computation graph. Never null.
111 /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
113
114 /// Contains the columns defined up to this node.
116
117public:
118 ////////////////////////////////////////////////////////////////////////////
119 /// \brief Copy-assignment operator for RInterface.
120 RInterface &operator=(const RInterface &) = default;
121
122 ////////////////////////////////////////////////////////////////////////////
123 /// \brief Copy-ctor for RInterface.
124 RInterface(const RInterface &) = default;
125
126 ////////////////////////////////////////////////////////////////////////////
127 /// \brief Move-ctor for RInterface.
128 RInterface(RInterface &&) = default;
129
130 ////////////////////////////////////////////////////////////////////////////
131 /// \brief Only enabled when building a RInterface<RLoopManager>.
132 template <typename T = Proxied, std::enable_if_t<std::is_same<T, RLoopManager>::value, int> = 0>
133 RInterface(const std::shared_ptr<Proxied> &proxied)
134 : fProxiedPtr(proxied), fLoopManager(proxied.get()), fDataSource(proxied->GetDataSource())
135 {
137 }
138
139 ////////////////////////////////////////////////////////////////////////////
140 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
141 /// Different RDataFrame methods return different C++ types. All nodes, however,
142 /// can be cast to this common type at the cost of a small performance penalty.
143 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
144 /// around via (non-template, C++11) helper functions.
145 /// Example usage:
146 /// ~~~{.cpp}
147 /// // a function that conditionally adds a Range to a RDataFrame node.
148 /// RNode MaybeAddRange(RNode df, bool mustAddRange)
149 /// {
150 /// return mustAddRange ? df.Range(1) : df;
151 /// }
152 /// // use as :
153 /// ROOT::RDataFrame df(10);
154 /// auto maybeRanged = MaybeAddRange(df, true);
155 /// ~~~
156 /// Note that it is not a problem to pass RNode's by value.
157 operator RNode() const
158 {
159 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fDefines,
161 }
162
163 ////////////////////////////////////////////////////////////////////////////
164 /// \brief Append a filter to the call graph.
165 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
166 /// signalling whether the event has passed the selection (true) or not (false).
167 /// \param[in] columns Names of the columns/branches in input to the filter function.
168 /// \param[in] name Optional name of this filter. See `Report`.
169 /// \return the filter node of the computation graph.
170 ///
171 /// Append a filter node at the point of the call graph corresponding to the
172 /// object this method is called on.
173 /// The callable `f` should not have side-effects (e.g. modification of an
174 /// external or static variable) to ensure correct results when implicit
175 /// multi-threading is active.
176 ///
177 /// RDataFrame only evaluates filters when necessary: if multiple filters
178 /// are chained one after another, they are executed in order and the first
179 /// one returning false causes the event to be discarded.
180 /// Even if multiple actions or transformations depend on the same filter,
181 /// it is executed once per entry. If its result is requested more than
182 /// once, the cached result is served.
183 ///
184 /// ### Example usage:
185 /// ~~~{.cpp}
186 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
187 /// auto filtered = df.Filter(myCut, {"x", "y"});
188 ///
189 /// // String: it must contain valid C++ except that column names can be used instead of variable names
190 /// auto filtered = df.Filter("x*y > 0");
191 /// ~~~
192 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
194 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
195 {
196 RDFInternal::CheckFilter(f);
197 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
198 constexpr auto nColumns = ColTypes_t::list_size;
199 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
200 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
201
203
204 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fDefines, name);
205 fLoopManager->Book(filterPtr.get());
206 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fDefines, fDataSource);
207 }
208
209 ////////////////////////////////////////////////////////////////////////////
210 /// \brief Append a filter to the call graph.
211 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
212 /// signalling whether the event has passed the selection (true) or not (false).
213 /// \param[in] name Optional name of this filter. See `Report`.
214 /// \return the filter node of the computation graph.
215 ///
216 /// Refer to the first overload of this method for the full documentation.
217 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
219 {
220 // The sfinae is there in order to pick up the overloaded method which accepts two strings
221 // rather than this template method.
222 return Filter(f, {}, name);
223 }
224
225 ////////////////////////////////////////////////////////////////////////////
226 /// \brief Append a filter to the call graph.
227 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
228 /// signalling whether the event has passed the selection (true) or not (false).
229 /// \param[in] columns Names of the columns/branches in input to the filter function.
230 /// \return the filter node of the computation graph.
231 ///
232 /// Refer to the first overload of this method for the full documentation.
233 template <typename F>
234 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
235 {
236 return Filter(f, ColumnNames_t{columns});
237 }
238
239 ////////////////////////////////////////////////////////////////////////////
240 /// \brief Append a filter to the call graph.
241 /// \param[in] expression The filter expression in C++
242 /// \param[in] name Optional name of this filter. See `Report`.
243 /// \return the filter node of the computation graph.
244 ///
245 /// The expression is just-in-time compiled and used to filter entries. It must
246 /// be valid C++ syntax in which variable names are substituted with the names
247 /// of branches/columns.
248 ///
249 /// ### Example usage:
250 /// ~~~{.cpp}
251 /// auto filtered_df = df.Filter("myCollection.size() > 3");
252 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
253 /// ~~~
255 {
256 // deleted by the jitted call to JitFilterHelper
257 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
258 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
259 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fDefines, fDataSource);
260 const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(fLoopManager, name);
261
262 RDFInternal::BookFilterJit(jittedFilter, upcastNodeOnHeap, name, expression, fLoopManager->GetAliasMap(),
264
265 fLoopManager->Book(jittedFilter.get());
267 }
268
269 // clang-format off
270 ////////////////////////////////////////////////////////////////////////////
271 /// \brief Define a new column.
272 /// \param[in] name The name of the defined column.
273 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
274 /// \param[in] columns Names of the columns/branches in input to the producer function.
275 /// \return the first node of the computation graph for which the new quantity is defined.
276 ///
277 /// Define a column that will be visible from all subsequent nodes
278 /// of the functional chain. The `expression` is only evaluated for entries that pass
279 /// all the preceding filters.
280 /// A new variable is created called `name`, accessible as if it was contained
281 /// in the dataset from subsequent transformations/actions.
282 ///
283 /// Use cases include:
284 /// * caching the results of complex calculations for easy and efficient multiple access
285 /// * extraction of quantities of interest from complex objects
286 ///
287 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
288 ///
289 /// ### Example usage:
290 /// ~~~{.cpp}
291 /// // assuming a function with signature:
292 /// double myComplexCalculation(const RVec<float> &muon_pts);
293 /// // we can pass it directly to Define
294 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
295 /// // alternatively, we can pass the body of the function as a string, as in Filter:
296 /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
297 /// ~~~
298 template <typename F, typename std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
300 {
301 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns, "Define");
302 }
303 // clang-format on
304
305 // clang-format off
306 ////////////////////////////////////////////////////////////////////////////
307 /// \brief Define a new column with a value dependent on the processing slot.
308 /// \param[in] name The name of the defined column.
309 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
310 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
311 /// \return the first node of the computation graph for which the new quantity is defined.
312 ///
313 /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner.
314 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
315 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
316 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
317 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
318 ///
319 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
320 /// ~~~{.cpp}
321 /// int function(unsigned int, double, double);
322 /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
323 /// df.DefineSlot("x", function, {"column1", "column2"})
324 /// ~~~
325 ///
326 /// See Define for more information.
327 template <typename F>
329 {
330 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns, "DefineSlot");
331 }
332 // clang-format on
333
334 // clang-format off
335 ////////////////////////////////////////////////////////////////////////////
336 /// \brief Define a new column with a value dependent on the processing slot and the current entry.
337 /// \param[in] name The name of the defined column.
338 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
339 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
340 /// \return the first node of the computation graph for which the new quantity is defined.
341 ///
342 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
343 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
344 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
345 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
346 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter
347 /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
348 ///
349 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
350 /// ~~~{.cpp}
351 /// int function(unsigned int, ULong64_t, double, double);
352 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
353 /// DefineSlotEntry("x", function, {"column1", "column2"})
354 /// ~~~
355 ///
356 /// See Define for more information.
357 template <typename F>
359 {
360 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns,
361 "DefineSlotEntry");
362 }
363 // clang-format on
364
365 ////////////////////////////////////////////////////////////////////////////
366 /// \brief Define a new column.
367 /// \param[in] name The name of the defined column.
368 /// \param[in] expression An expression in C++ which represents the defined value
369 /// \return the first node of the computation graph for which the new quantity is defined.
370 ///
371 /// The expression is just-in-time compiled and used to produce the column entries.
372 /// It must be valid C++ syntax in which variable names are substituted with the names
373 /// of branches/columns.
374 ///
375 /// Refer to the first overload of this method for the full documentation.
377 {
378 constexpr auto where = "Define";
380 // these checks must be done before jitting lest we throw exceptions in jitted code
384
385 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
386 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fDefines,
387 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
388
390 newCols.AddColumn(jittedDefine, name);
391
392 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
393
394 return newInterface;
395 }
396
397 ////////////////////////////////////////////////////////////////////////////
398 /// \brief Overwrite the value and/or type of an existing column.
399 /// \param[in] name The name of the column to redefine.
400 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
401 /// \param[in] columns Names of the columns/branches in input to the expression.
402 /// \return the first node of the computation graph for which the quantity is redefined.
403 ///
404 /// The old value of the column can be used as an input for the expression.
405 ///
406 /// An exception is thrown in case the column to redefine does not already exist.
407 /// See Define() for more information.
408 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
410 {
411 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns, "Redefine");
412 }
413
414 // clang-format off
415 ////////////////////////////////////////////////////////////////////////////
416 /// \brief Overwrite the value and/or type of an existing column.
417 /// \param[in] name The name of the column to redefine.
418 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
419 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot).
420 /// \return the first node of the computation graph for which the new quantity is defined.
421 ///
422 /// The old value of the column can be used as an input for the expression.
423 /// An exception is thrown in case the column to redefine does not already exist.
424 ///
425 /// See DefineSlot() for more information.
426 // clang-format on
427 template <typename F>
429 {
430 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns, "RedefineSlot");
431 }
432
433 // clang-format off
434 ////////////////////////////////////////////////////////////////////////////
435 /// \brief Overwrite the value and/or type of an existing column.
436 /// \param[in] name The name of the column to redefine.
437 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
438 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
439 /// \return the first node of the computation graph for which the new quantity is defined.
440 ///
441 /// The old value of the column can be used as an input for the expression.
442 /// An exception is thrown in case the column to re-define does not already exist.
443 ///
444 /// See DefineSlotEntry() for more information.
445 // clang-format on
446 template <typename F>
448 {
449 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns,
450 "RedefineSlotEntry");
451 }
452
453 ////////////////////////////////////////////////////////////////////////////
454 /// \brief Overwrite the value and/or type of an existing column.
455 /// \param[in] name The name of the column to redefine.
456 /// \param[in] expression An expression in C++ which represents the defined value
457 /// \return the first node of the computation graph for which the new quantity is defined.
458 ///
459 /// The expression is just-in-time compiled and used to produce the column entries.
460 /// It must be valid C++ syntax in which variable names are substituted with the names
461 /// of branches/columns.
462 ///
463 /// The old value of the column can be used as an input for the expression.
464 /// An exception is thrown in case the column to re-define does not already exist.
465 ///
466 /// Aliases cannot be overridden. See the corresponding Define() overload for more information.
468 {
469 constexpr auto where = "Redefine";
474
475 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
476 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fDefines,
477 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
478
480 newCols.AddColumn(jittedDefine, name);
481
482 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
483
484 return newInterface;
485 }
486
487 ////////////////////////////////////////////////////////////////////////////
488 /// \brief Define a new column that is updated when the input sample changes.
489 /// \param[in] name The name of the defined column.
490 /// \param[in] expression A C++ callable that computes the new value of the defined column.
491 /// \return the first node of the computation graph for which the new quantity is defined.
492 ///
493 /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)`
494 /// where:
495 /// - `T` is the type of the defined column
496 /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify
497 /// the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
498 /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is
499 /// being processed (see the class docs for more information).
500 ///
501 /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being
502 /// processed or to inject a callback into the event loop that is only called when the processing of a new sample
503 /// starts rather than at every entry.
504 ///
505 /// ### Example usage:
506 /// ~~~{.cpp}
507 /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
508 /// df.DefinePerSample("weightbysample",
509 /// [](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
510 /// { return id.Contains("sample1") ? 1.0f : 2.0f; });
511 /// ~~~
512 // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch
513 template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
515 {
516 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
520
521 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t));
522 if (retTypeName.empty()) {
523 // The type is not known to the interpreter.
524 // We must not error out here, but if/when this column is used in jitted code
525 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t));
526 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
527 }
528
529 auto newColumn = std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression),
531
532 auto updateDefinePerSample = [newColumn](unsigned int slot, const ROOT::RDF::RSampleInfo &id) {
533 newColumn->Update(slot, id);
534 };
535 fLoopManager->AddSampleCallback(std::move(updateDefinePerSample));
536
538 newCols.AddColumn(std::move(newColumn), name);
540 return newInterface;
541 }
542
543 ////////////////////////////////////////////////////////////////////////////
544 /// \brief Define a new column that is updated when the input sample changes.
545 /// \param[in] name The name of the defined column.
546 /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value.
547 /// \return the first node of the computation graph for which the new quantity is defined.
548 ///
549 /// The expression is just-in-time compiled and used to produce the column entries.
550 /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is
551 /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the
552 /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.
553 ///
554 /// ### Example usage:
555 /// ~~~{.py}
556 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
557 /// df.DefinePerSample("weightbysample", "rdfsampleinfo_.Contains('sample1') ? 1.0f : 2.0f")
558 /// ~~~
559 ///
560 /// \note
561 /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this
562 /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as
563 /// input parameters. This is for example the correct way to call this overload when working in PyROOT:
564 /// ~~~{.py}
565 /// ROOT.gInterpreter.Declare(
566 /// """
567 /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
568 /// return id.Contains("sample1") ? 1.0f : 2.0f;
569 /// }
570 /// """)
571 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
572 /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
573 /// ~~~
575 {
576 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
577 // these checks must be done before jitting lest we throw exceptions in jitted code
581
582 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
583 auto jittedDefine =
584 RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fDefines, upcastNodeOnHeap);
585 auto updateDefinePerSample = [jittedDefine](unsigned int slot, const ROOT::RDF::RSampleInfo &id) {
586 jittedDefine->Update(slot, id);
587 };
588 fLoopManager->AddSampleCallback(std::move(updateDefinePerSample));
589
591 newCols.AddColumn(jittedDefine, name);
592
593 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
594
595 return newInterface;
596 }
597
598 ////////////////////////////////////////////////////////////////////////////
599 /// \brief Allow to refer to a column with a different name.
600 /// \param[in] alias name of the column alias
601 /// \param[in] columnName of the column to be aliased
602 /// \return the first node of the computation graph for which the alias is available.
603 ///
604 /// Aliasing an alias is supported.
605 ///
606 /// ### Example usage:
607 /// ~~~{.cpp}
608 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
609 /// ~~~
611 {
612 // The symmetry with Define is clear. We want to:
613 // - Create globally the alias and return this very node, unchanged
614 // - Make aliases accessible based on chains and not globally
615
616 // Helper to find out if a name is a column
617 auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
618
619 constexpr auto where = "Alias";
621 // If the alias name is a column name, there is a problem
623 fLoopManager->GetBranchNames(), dsColumnNames);
624
625 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
626
627 fLoopManager->AddColumnAlias(std::string(alias), validColumnName);
628
630
631 newCols.AddName(alias);
632 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
633
634 return newInterface;
635 }
636
637 ////////////////////////////////////////////////////////////////////////////
638 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
639 /// \tparam ColumnTypes variadic list of branch/column types.
640 /// \param[in] treename The name of the output TTree.
641 /// \param[in] filename The name of the output TFile.
642 /// \param[in] columnList The list of names of the columns/branches to be written.
643 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
644 /// \return a `RDataFrame` that wraps the snapshotted dataset.
645 ///
646 /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
647 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
648 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
649 /// written out and it appears before the array in the columnList.
650 ///
651 /// By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other
652 /// types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are
653 /// present, by default all friend top-level branches that have names that do not collide with
654 /// names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out
655 /// friend branches with the same names of branches in the main TTree/TChain with names of the form
656 /// '<friendname>_<branchname>' in order to differentiate them from the branches in the main tree/chain.
657 ///
658 /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of
659 /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled with
660 /// respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in wrong
661 /// associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will
662 /// error out if such a "shuffled" TTree is used in a friendship.
663 ///
664 /// \note In case no events are written out (e.g. because no event passes all filters) the behavior of Snapshot in
665 /// single-thread and multi-thread runs is different: in single-thread runs, Snapshot will write out a TTree with
666 /// the specified name and zero entries; in multi-thread runs, no TTree object will be written out to disk.
667 ///
668 /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns
669 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
670 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
671 /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`.
672 ///
673 /// ### Example invocations:
674 ///
675 /// ~~~{.cpp}
676 /// // without specifying template parameters (column types automatically deduced)
677 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
678 ///
679 /// // specifying template parameters ("x" is `int`, "y" is `float`)
680 /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
681 /// ~~~
682 ///
683 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
684 /// `RSnapshotOptions`:
685 /// ~~~{.cpp}
686 /// RSnapshotOptions opts;
687 /// opts.fLazy = true;
688 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
689 /// ~~~
690 template <typename... ColumnTypes>
692 Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
693 const RSnapshotOptions &options = RSnapshotOptions())
694 {
695 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
696 }
697
698 ////////////////////////////////////////////////////////////////////////////
699 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
700 /// \param[in] treename The name of the output TTree.
701 /// \param[in] filename The name of the output TFile.
702 /// \param[in] columnList The list of names of the columns/branches to be written.
703 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
704 /// \return a `RDataFrame` that wraps the snapshotted dataset.
705 ///
706 /// This function returns a `RDataFrame` built with the output tree as a source.
707 /// The types of the columns are automatically inferred and do not need to be specified.
708 ///
709 /// See above for a more complete description and example usages.
711 const ColumnNames_t &columnList,
712 const RSnapshotOptions &options = RSnapshotOptions())
713 {
714 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
715 const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
717
718 const auto fullTreeName = treename;
719 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
720 treename = parsedTreePath.fTreeName;
721 const auto &dirname = parsedTreePath.fDirName;
722
723 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
724 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
725
727 auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, validCols);
728
729 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
730 validCols, newRDF, snapHelperArgs, validCols.size());
731
732 if (!options.fLazy)
733 *resPtr;
734 return resPtr;
735 }
736
737 // clang-format off
738 ////////////////////////////////////////////////////////////////////////////
739 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
740 /// \param[in] treename The name of the output TTree.
741 /// \param[in] filename The name of the output TFile.
742 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
743 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
744 /// \return a `RDataFrame` that wraps the snapshotted dataset.
745 ///
746 /// This function returns a `RDataFrame` built with the output tree as a source.
747 /// The types of the columns are automatically inferred and do not need to be specified.
748 ///
749 /// See above for a more complete description and example usages.
751 std::string_view columnNameRegexp = "",
752 const RSnapshotOptions &options = RSnapshotOptions())
753 {
754 const auto definedColumns = fDefines.GetNames();
755 auto *tree = fLoopManager->GetTree();
756 const auto treeBranchNames = tree != nullptr ? RDFInternal::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
757 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
758 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
759 ColumnNames_t dsColumnsWithoutSizeColumns;
760 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
761 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
762 ColumnNames_t columnNames;
763 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size());
764 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
765 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
766 columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end());
767 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
768 return Snapshot(treename, filename, selectedColumns, options);
769 }
770 // clang-format on
771
772 // clang-format off
773 ////////////////////////////////////////////////////////////////////////////
774 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
775 /// \param[in] treename The name of the output TTree.
776 /// \param[in] filename The name of the output TFile.
777 /// \param[in] columnList The list of names of the columns/branches to be written.
778 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
779 /// \return a `RDataFrame` that wraps the snapshotted dataset.
780 ///
781 /// This function returns a `RDataFrame` built with the output tree as a source.
782 /// The types of the columns are automatically inferred and do not need to be specified.
783 ///
784 /// See above for a more complete description and example usages.
786 std::initializer_list<std::string> columnList,
787 const RSnapshotOptions &options = RSnapshotOptions())
788 {
789 ColumnNames_t selectedColumns(columnList);
790 return Snapshot(treename, filename, selectedColumns, options);
791 }
792 // clang-format on
793
794 ////////////////////////////////////////////////////////////////////////////
795 /// \brief Save selected columns in memory.
796 /// \tparam ColumnTypes variadic list of branch/column types.
797 /// \param[in] columnList columns to be cached in memory.
798 /// \return a `RDataFrame` that wraps the cached dataset.
799 ///
800 /// This action returns a new `RDataFrame` object, completely detached from
801 /// the originating `RDataFrame`. The new dataframe only contains the cached
802 /// columns and stores their content in memory for fast, zero-copy subsequent access.
803 ///
804 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
805 /// fits in memory and that will be accessed many times.
806 ///
807 /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns
808 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
809 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
810 /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`.
811 ///
812 /// ### Example usage:
813 ///
814 /// **Types and columns specified:**
815 /// ~~~{.cpp}
816 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
817 /// ~~~
818 ///
819 /// **Types inferred and columns specified (this invocation relies on jitting):**
820 /// ~~~{.cpp}
821 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
822 /// ~~~
823 ///
824 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
825 /// ~~~{.cpp}
826 /// auto cache_all_cols_df = df.Cache(myRegexp);
827 /// ~~~
828 template <typename... ColumnTypes>
830 {
831 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
832 return CacheImpl<ColumnTypes...>(columnList, staticSeq);
833 }
834
835 ////////////////////////////////////////////////////////////////////////////
836 /// \brief Save selected columns in memory.
837 /// \param[in] columnList columns to be cached in memory
838 /// \return a `RDataFrame` that wraps the cached dataset.
839 ///
840 /// See the previous overloads for more information.
842 {
843 // Early return: if the list of columns is empty, just return an empty RDF
844 // If we proceed, the jitted call will not compile!
845 if (columnList.empty()) {
846 auto nEntries = *this->Count();
847 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
848 return emptyRDF;
849 }
850
851 std::stringstream cacheCall;
852 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
853 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
855 // build a string equivalent to
856 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
857 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
858 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
860 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
861 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
862
863 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache");
864
865 const auto validColumnNames =
866 GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
867 const auto colTypes = GetValidatedArgTypes(validColumnNames, fDefines, fLoopManager->GetTree(), fDataSource,
868 "Cache", /*vector2rvec=*/false);
869 for (const auto &colType : colTypes)
870 cacheCall << colType << ", ";
871 if (!columnListWithoutSizeColumns.empty())
872 cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
873 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
874 << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));";
875
876 // book the code to jit with the RLoopManager and trigger the event loop
877 fLoopManager->ToJitExec(cacheCall.str());
878 fLoopManager->Jit();
879
880 return resRDF;
881 }
882
883 ////////////////////////////////////////////////////////////////////////////
884 /// \brief Save selected columns in memory.
885 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
886 /// \return a `RDataFrame` that wraps the cached dataset.
887 ///
888 /// The existing columns are matched against the regular expression. If the string provided
889 /// is empty, all columns are selected. See the previous overloads for more information.
891 {
892 const auto definedColumns = fDefines.GetNames();
893 auto *tree = fLoopManager->GetTree();
894 const auto treeBranchNames = tree != nullptr ? RDFInternal::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
895 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
896 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
897 ColumnNames_t dsColumnsWithoutSizeColumns;
898 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
899 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
900 ColumnNames_t columnNames;
901 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size());
902 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
903 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
904 columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end());
905 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache");
906 return Cache(selectedColumns);
907 }
908
909 ////////////////////////////////////////////////////////////////////////////
910 /// \brief Save selected columns in memory.
911 /// \param[in] columnList columns to be cached in memory.
912 /// \return a `RDataFrame` that wraps the cached dataset.
913 ///
914 /// See the previous overloads for more information.
915 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
916 {
917 ColumnNames_t selectedColumns(columnList);
918 return Cache(selectedColumns);
919 }
920
921 // clang-format off
922 ////////////////////////////////////////////////////////////////////////////
923 /// \brief Creates a node that filters entries based on range: [begin, end).
924 /// \param[in] begin Initial entry number considered for this range.
925 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
926 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
927 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
928 ///
929 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
930 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
931 ///
932 /// ### Example usage:
933 /// ~~~{.cpp}
934 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
935 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
936 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
937 /// ~~~
938 // clang-format on
939 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
940 {
941 // check invariants
942 if (stride == 0 || (end != 0 && end < begin))
943 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
944 CheckIMTDisabled("Range");
945
947 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
948 fLoopManager->Book(rangePtr.get());
950 return tdf_r;
951 }
952
953 // clang-format off
954 ////////////////////////////////////////////////////////////////////////////
955 /// \brief Creates a node that filters entries based on range.
956 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
957 /// \return a node of the computation graph for which the range is defined.
958 ///
959 /// See the other Range overload for a detailed description.
960 // clang-format on
961 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
962
963 // clang-format off
964 ////////////////////////////////////////////////////////////////////////////
965 /// \brief Execute a user-defined function on each entry (*instant action*).
966 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
967 /// \param[in] columns Names of the columns/branches in input to the user function.
968 ///
969 /// The callable `f` is invoked once per entry. This is an *instant action*:
970 /// upon invocation, an event loop as well as execution of all scheduled actions
971 /// is triggered.
972 /// Users are responsible for the thread-safety of this callable when executing
973 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
974 ///
975 /// ### Example usage:
976 /// ~~~{.cpp}
977 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
978 /// ~~~
979 // clang-format on
980 template <typename F>
981 void Foreach(F f, const ColumnNames_t &columns = {})
982 {
983 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
984 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
985 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
986 }
987
988 // clang-format off
989 ////////////////////////////////////////////////////////////////////////////
990 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*).
991 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
992 /// \param[in] columns Names of the columns/branches in input to the user function.
993 ///
994 /// Same as `Foreach`, but the user-defined function takes an extra
995 /// `unsigned int` as its first parameter, the *processing slot index*.
996 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
997 /// for each thread of execution.
998 /// This is meant as a helper in writing thread-safe `Foreach`
999 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
1000 /// The user-defined processing callable is able to follow different
1001 /// *streams of processing* indexed by the first parameter.
1002 /// `ForeachSlot` works just as well with single-thread execution: in that
1003 /// case `slot` will always be `0`.
1004 ///
1005 /// ### Example usage:
1006 /// ~~~{.cpp}
1007 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
1008 /// ~~~
1009 // clang-format on
1010 template <typename F>
1011 void ForeachSlot(F f, const ColumnNames_t &columns = {})
1012 {
1013 using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
1014 constexpr auto nColumns = ColTypes_t::list_size;
1015
1016 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1017 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
1018
1019 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
1021
1022 auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fDefines);
1023 fLoopManager->Book(action.get());
1024
1025 fLoopManager->Run();
1026 }
1027
1028 // clang-format off
1029 ////////////////////////////////////////////////////////////////////////////
1030 /// \brief Execute a user-defined reduce operation on the values of a column.
1031 /// \tparam F The type of the reduce callable. Automatically deduced.
1032 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1033 /// \param[in] f A callable with signature `T(T,T)`
1034 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1035 /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr.
1036 ///
1037 /// A reduction takes two values of a column and merges them into one (e.g.
1038 /// by summing them, taking the maximum, etc). This action performs the
1039 /// specified reduction operation on all processed column values, returning
1040 /// a single value of the same type. The callable f must satisfy the general
1041 /// requirements of a *processing function* besides having signature `T(T,T)`
1042 /// where `T` is the type of column columnName.
1043 ///
1044 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
1045 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
1046 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
1047 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
1048 /// overload.
1049 ///
1050 /// ### Example usage:
1051 /// ~~~{.cpp}
1052 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
1053 /// ~~~
1054 ///
1055 /// This action is *lazy*: upon invocation of this method the calculation is
1056 /// booked but not executed. Also see RResultPtr.
1057 // clang-format on
1058 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1060 {
1061 static_assert(
1062 std::is_default_constructible<T>::value,
1063 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
1064 return Reduce(std::move(f), columnName, T());
1065 }
1066
1067 ////////////////////////////////////////////////////////////////////////////
1068 /// \brief Execute a user-defined reduce operation on the values of a column.
1069 /// \tparam F The type of the reduce callable. Automatically deduced.
1070 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1071 /// \param[in] f A callable with signature `T(T,T)`
1072 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1073 /// \param[in] redIdentity The reduced object of each thread is initialised to this value.
1074 /// \return the reduced quantity wrapped in a RResultPtr.
1075 ///
1076 /// ### Example usage:
1077 /// ~~~{.cpp}
1078 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
1079 /// ~~~
1080 /// See the description of the first Reduce overload for more information.
1081 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1082 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
1083 {
1084 return Aggregate(f, f, columnName, redIdentity);
1085 }
1086
1087 ////////////////////////////////////////////////////////////////////////////
1088 /// \brief Return the number of entries processed (*lazy action*).
1089 /// \return the number of entries wrapped in a RResultPtr.
1090 ///
1091 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
1092 /// This action is *lazy*: upon invocation of this method the calculation is
1093 /// booked but not executed. Also see RResultPtr.
1094 ///
1095 /// ### Example usage:
1096 /// ~~~{.cpp}
1097 /// auto nEntriesAfterCuts = myFilteredDf.Count();
1098 /// ~~~
1099 ///
1101 {
1102 const auto nSlots = fLoopManager->GetNSlots();
1103 auto cSPtr = std::make_shared<ULong64_t>(0);
1104 using Helper_t = RDFInternal::CountHelper;
1106 auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
1108 fLoopManager->Book(action.get());
1109 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
1110 }
1111
1112 ////////////////////////////////////////////////////////////////////////////
1113 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default).
1114 /// \tparam T The type of the column.
1115 /// \tparam COLL The type of collection used to store the values.
1116 /// \param[in] column The name of the column to collect the values of.
1117 /// \return the content of the selected column wrapped in a RResultPtr.
1118 ///
1119 /// The collection type to be specified for C-style array columns is `RVec<T>`:
1120 /// in this case the returned collection is a `std::vector<RVec<T>>`.
1121 /// ### Example usage:
1122 /// ~~~{.cpp}
1123 /// // In this case intCol is a std::vector<int>
1124 /// auto intCol = rdf.Take<int>("integerColumn");
1125 /// // Same content as above but in this case taken as a RVec<int>
1126 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
1127 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
1128 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
1129 /// ~~~
1130 /// This action is *lazy*: upon invocation of this method the calculation is
1131 /// booked but not executed. Also see RResultPtr.
1132 template <typename T, typename COLL = std::vector<T>>
1134 {
1135 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
1136
1137 const auto validColumnNames = GetValidatedColumnNames(1, columns);
1138 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
1139
1140 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1142 auto valuesPtr = std::make_shared<COLL>();
1143 const auto nSlots = fLoopManager->GetNSlots();
1144
1145 auto action = std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fDefines);
1146 fLoopManager->Book(action.get());
1147 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
1148 }
1149
1150 ////////////////////////////////////////////////////////////////////////////
1151 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1152 /// \tparam V The type of the column used to fill the histogram.
1153 /// \param[in] model The returned histogram will be constructed using this as a model.
1154 /// \param[in] vName The name of the column that will fill the histogram.
1155 /// \return the monodimensional histogram wrapped in a RResultPtr.
1156 ///
1157 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
1158 /// is filled with each one of the elements of the container. In case multiple columns of container type
1159 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1160 /// possibly different lengths between events).
1161 /// This action is *lazy*: upon invocation of this method the calculation is
1162 /// booked but not executed. Also see RResultPtr.
1163 ///
1164 /// ### Example usage:
1165 /// ~~~{.cpp}
1166 /// // Deduce column type (this invocation needs jitting internally)
1167 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1168 /// // Explicit column type
1169 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1170 /// ~~~
1171 ///
1172 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1173 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1174 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1175 template <typename V = RDFDetail::RInferredType>
1176 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
1177 {
1178 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
1179
1180 const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
1181
1182 std::shared_ptr<::TH1D> h(nullptr);
1183 {
1184 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1185 h = model.GetHistogram();
1186 h->SetDirectory(nullptr);
1187 }
1188
1189 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
1190 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
1191 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h);
1192 }
1193
1194 ////////////////////////////////////////////////////////////////////////////
1195 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1196 /// \tparam V The type of the column used to fill the histogram.
1197 /// \param[in] vName The name of the column that will fill the histogram.
1198 /// \return the monodimensional histogram wrapped in a RResultPtr.
1199 ///
1200 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1201 /// The "name" and "title" strings are built starting from the input column name.
1202 /// See the description of the first Histo1D() overload for more details.
1203 ///
1204 /// ### Example usage:
1205 /// ~~~{.cpp}
1206 /// // Deduce column type (this invocation needs jitting internally)
1207 /// auto myHist1 = myDf.Histo1D("myColumn");
1208 /// // Explicit column type
1209 /// auto myHist2 = myDf.Histo1D<float>("myColumn");
1210 /// ~~~
1211 template <typename V = RDFDetail::RInferredType>
1213 {
1214 const auto h_name = std::string(vName);
1215 const auto h_title = h_name + ";" + h_name + ";count";
1216 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1217 }
1218
1219 ////////////////////////////////////////////////////////////////////////////
1220 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1221 /// \tparam V The type of the column used to fill the histogram.
1222 /// \tparam W The type of the column used as weights.
1223 /// \param[in] model The returned histogram will be constructed using this as a model.
1224 /// \param[in] vName The name of the column that will fill the histogram.
1225 /// \param[in] wName The name of the column that will provide the weights.
1226 /// \return the monodimensional histogram wrapped in a RResultPtr.
1227 ///
1228 /// See the description of the first Histo1D() overload for more details.
1229 ///
1230 /// ### Example usage:
1231 /// ~~~{.cpp}
1232 /// // Deduce column type (this invocation needs jitting internally)
1233 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1234 /// // Explicit column type
1235 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1236 /// ~~~
1237 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1239 {
1240 const std::vector<std::string_view> columnViews = {vName, wName};
1241 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1242 ? ColumnNames_t()
1243 : ColumnNames_t(columnViews.begin(), columnViews.end());
1244 std::shared_ptr<::TH1D> h(nullptr);
1245 {
1246 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1247 h = model.GetHistogram();
1248 }
1249 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h);
1250 }
1251
1252 ////////////////////////////////////////////////////////////////////////////
1253 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1254 /// \tparam V The type of the column used to fill the histogram.
1255 /// \tparam W The type of the column used as weights.
1256 /// \param[in] vName The name of the column that will fill the histogram.
1257 /// \param[in] wName The name of the column that will provide the weights.
1258 /// \return the monodimensional histogram wrapped in a RResultPtr.
1259 ///
1260 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1261 /// The "name" and "title" strings are built starting from the input column names.
1262 /// See the description of the first Histo1D() overload for more details.
1263 ///
1264 /// ### Example usage:
1265 /// ~~~{.cpp}
1266 /// // Deduce column types (this invocation needs jitting internally)
1267 /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1268 /// // Explicit column types
1269 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1270 /// ~~~
1271 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1273 {
1274 // We build name and title based on the value and weight column names
1275 std::string str_vName{vName};
1276 std::string str_wName{wName};
1277 const auto h_name = str_vName + "_weighted_" + str_wName;
1278 const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1279 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1280 }
1281
1282 ////////////////////////////////////////////////////////////////////////////
1283 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1284 /// \tparam V The type of the column used to fill the histogram.
1285 /// \tparam W The type of the column used as weights.
1286 /// \param[in] model The returned histogram will be constructed using this as a model.
1287 /// \return the monodimensional histogram wrapped in a RResultPtr.
1288 ///
1289 /// This overload will use the first two default columns as column names.
1290 /// See the description of the first Histo1D() overload for more details.
1291 template <typename V, typename W>
1292 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1293 {
1294 return Histo1D<V, W>(model, "", "");
1295 }
1296
1297 ////////////////////////////////////////////////////////////////////////////
1298 /// \brief Fill and return a two-dimensional histogram (*lazy action*).
1299 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1300 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1301 /// \param[in] model The returned histogram will be constructed using this as a model.
1302 /// \param[in] v1Name The name of the column that will fill the x axis.
1303 /// \param[in] v2Name The name of the column that will fill the y axis.
1304 /// \return the bidimensional histogram wrapped in a RResultPtr.
1305 ///
1306 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1307 /// is filled with each one of the elements of the container. In case multiple columns of container type
1308 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1309 /// possibly different lengths between events).
1310 /// This action is *lazy*: upon invocation of this method the calculation is
1311 /// booked but not executed. Also see RResultPtr.
1312 ///
1313 /// ### Example usage:
1314 /// ~~~{.cpp}
1315 /// // Deduce column types (this invocation needs jitting internally)
1316 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1317 /// // Explicit column types
1318 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1319 /// ~~~
1320 ///
1321 ///
1322 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1323 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1324 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1325 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1327 {
1328 std::shared_ptr<::TH2D> h(nullptr);
1329 {
1330 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1331 h = model.GetHistogram();
1332 }
1333 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1334 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1335 }
1336 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1337 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1338 ? ColumnNames_t()
1339 : ColumnNames_t(columnViews.begin(), columnViews.end());
1340 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h);
1341 }
1342
1343 ////////////////////////////////////////////////////////////////////////////
1344 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*).
1345 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1346 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1347 /// \tparam W The type of the column used for the weights of the histogram.
1348 /// \param[in] model The returned histogram will be constructed using this as a model.
1349 /// \param[in] v1Name The name of the column that will fill the x axis.
1350 /// \param[in] v2Name The name of the column that will fill the y axis.
1351 /// \param[in] wName The name of the column that will provide the weights.
1352 /// \return the bidimensional histogram wrapped in a RResultPtr.
1353 ///
1354 /// This action is *lazy*: upon invocation of this method the calculation is
1355 /// booked but not executed. Also see RResultPtr.
1356 ///
1357 /// ### Example usage:
1358 /// ~~~{.cpp}
1359 /// // Deduce column types (this invocation needs jitting internally)
1360 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1361 /// // Explicit column types
1362 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1363 /// ~~~
1364 ///
1365 /// See the documentation of the first Histo2D() overload for more details.
1366 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1367 typename W = RDFDetail::RInferredType>
1370 {
1371 std::shared_ptr<::TH2D> h(nullptr);
1372 {
1373 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1374 h = model.GetHistogram();
1375 }
1376 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1377 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1378 }
1379 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1380 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1381 ? ColumnNames_t()
1382 : ColumnNames_t(columnViews.begin(), columnViews.end());
1383 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h);
1384 }
1385
1386 template <typename V1, typename V2, typename W>
1388 {
1389 return Histo2D<V1, V2, W>(model, "", "", "");
1390 }
1391
1392 ////////////////////////////////////////////////////////////////////////////
1393 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1394 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1395 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1396 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1397 /// \param[in] model The returned histogram will be constructed using this as a model.
1398 /// \param[in] v1Name The name of the column that will fill the x axis.
1399 /// \param[in] v2Name The name of the column that will fill the y axis.
1400 /// \param[in] v3Name The name of the column that will fill the z axis.
1401 /// \return the tridimensional histogram wrapped in a RResultPtr.
1402 ///
1403 /// This action is *lazy*: upon invocation of this method the calculation is
1404 /// booked but not executed. Also see RResultPtr.
1405 ///
1406 /// ### Example usage:
1407 /// ~~~{.cpp}
1408 /// // Deduce column types (this invocation needs jitting internally)
1409 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1410 /// "myValueX", "myValueY", "myValueZ");
1411 /// // Explicit column types
1412 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1413 /// "myValueX", "myValueY", "myValueZ");
1414 /// ~~~
1415 ///
1416 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1417 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1418 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1419 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1420 typename V3 = RDFDetail::RInferredType>
1422 std::string_view v3Name = "")
1423 {
1424 std::shared_ptr<::TH3D> h(nullptr);
1425 {
1426 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1427 h = model.GetHistogram();
1428 }
1429 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1430 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1431 }
1432 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1433 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1434 ? ColumnNames_t()
1435 : ColumnNames_t(columnViews.begin(), columnViews.end());
1436 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h);
1437 }
1438
1439 ////////////////////////////////////////////////////////////////////////////
1440 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1441 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1442 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1443 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1444 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1445 /// \param[in] model The returned histogram will be constructed using this as a model.
1446 /// \param[in] v1Name The name of the column that will fill the x axis.
1447 /// \param[in] v2Name The name of the column that will fill the y axis.
1448 /// \param[in] v3Name The name of the column that will fill the z axis.
1449 /// \param[in] wName The name of the column that will provide the weights.
1450 /// \return the tridimensional histogram wrapped in a RResultPtr.
1451 ///
1452 /// This action is *lazy*: upon invocation of this method the calculation is
1453 /// booked but not executed. Also see RResultPtr.
1454 ///
1455 /// ### Example usage:
1456 /// ~~~{.cpp}
1457 /// // Deduce column types (this invocation needs jitting internally)
1458 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1459 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1460 /// // Explicit column types
1461 /// using d_t = double;
1462 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1463 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1464 /// ~~~
1465 ///
1466 ///
1467 /// See the documentation of the first Histo2D() overload for more details.
1468 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1469 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1471 std::string_view v3Name, std::string_view wName)
1472 {
1473 std::shared_ptr<::TH3D> h(nullptr);
1474 {
1475 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1476 h = model.GetHistogram();
1477 }
1478 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1479 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1480 }
1481 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1482 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1483 ? ColumnNames_t()
1484 : ColumnNames_t(columnViews.begin(), columnViews.end());
1485 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h);
1486 }
1487
1488 template <typename V1, typename V2, typename V3, typename W>
1490 {
1491 return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1492 }
1493
1494 ////////////////////////////////////////////////////////////////////////////
1495 /// \brief Fill and return a graph (*lazy action*).
1496 /// \tparam V1 The type of the column used to fill the x axis of the graph.
1497 /// \tparam V2 The type of the column used to fill the y axis of the graph.
1498 /// \param[in] v1Name The name of the column that will fill the x axis.
1499 /// \param[in] v2Name The name of the column that will fill the y axis.
1500 /// \return the graph wrapped in a RResultPtr.
1501 ///
1502 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1503 /// is filled with each one of the elements of the container.
1504 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1505 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1506 /// A name and a title to the graph is given based on the input column names.
1507 ///
1508 /// This action is *lazy*: upon invocation of this method the calculation is
1509 /// booked but not executed. Also see RResultPtr.
1510 ///
1511 /// ### Example usage:
1512 /// ~~~{.cpp}
1513 /// // Deduce column types (this invocation needs jitting internally)
1514 /// auto myGraph1 = myDf.Graph("xValues", "yValues");
1515 /// // Explicit column types
1516 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
1517 /// ~~~
1518 ///
1519 /// \note Differently from other ROOT interfaces, the returned graph is not associated to gDirectory
1520 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1521 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1522 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1524 {
1525 auto graph = std::make_shared<::TGraph>();
1526 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1527 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1528 ? ColumnNames_t()
1529 : ColumnNames_t(columnViews.begin(), columnViews.end());
1530
1531 const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
1532
1533 // We build a default name and title based on the input columns
1534 if (!(validatedColumns[0].empty() && validatedColumns[1].empty())) {
1535 const auto g_name = std::string(v1Name) + "_vs_" + std::string(v2Name);
1536 const auto g_title = std::string(v1Name) + " vs " + std::string(v2Name);
1537 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
1538 graph->GetXaxis()->SetTitle(std::string(v1Name).c_str());
1539 graph->GetYaxis()->SetTitle(std::string(v2Name).c_str());
1540 }
1541
1542 return CreateAction<RDFInternal::ActionTags::Graph, V1, V2>(validatedColumns, graph, graph);
1543 }
1544
1545 ////////////////////////////////////////////////////////////////////////////
1546 /// \brief Fill and return a one-dimensional profile (*lazy action*).
1547 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1548 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1549 /// \param[in] model The model to be considered to build the new return value.
1550 /// \param[in] v1Name The name of the column that will fill the x axis.
1551 /// \param[in] v2Name The name of the column that will fill the y axis.
1552 /// \return the monodimensional profile wrapped in a RResultPtr.
1553 ///
1554 /// This action is *lazy*: upon invocation of this method the calculation is
1555 /// booked but not executed. Also see RResultPtr.
1556 ///
1557 /// ### Example usage:
1558 /// ~~~{.cpp}
1559 /// // Deduce column types (this invocation needs jitting internally)
1560 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1561 /// // Explicit column types
1562 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1563 /// ~~~
1564 ///
1565 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
1566 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1567 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1568 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1570 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1571 {
1572 std::shared_ptr<::TProfile> h(nullptr);
1573 {
1574 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1575 h = model.GetProfile();
1576 }
1577
1578 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1579 throw std::runtime_error("Profiles with no axes limits are not supported yet.");
1580 }
1581 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1582 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1583 ? ColumnNames_t()
1584 : ColumnNames_t(columnViews.begin(), columnViews.end());
1585 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h);
1586 }
1587
1588 ////////////////////////////////////////////////////////////////////////////
1589 /// \brief Fill and return a one-dimensional profile (*lazy action*).
1590 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1591 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1592 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
1593 /// \param[in] model The model to be considered to build the new return value.
1594 /// \param[in] v1Name The name of the column that will fill the x axis.
1595 /// \param[in] v2Name The name of the column that will fill the y axis.
1596 /// \param[in] wName The name of the column that will provide the weights.
1597 /// \return the monodimensional profile wrapped in a RResultPtr.
1598 ///
1599 /// This action is *lazy*: upon invocation of this method the calculation is
1600 /// booked but not executed. Also see RResultPtr.
1601 ///
1602 /// ### Example usage:
1603 /// ~~~{.cpp}
1604 /// // Deduce column types (this invocation needs jitting internally)
1605 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
1606 /// // Explicit column types
1607 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
1608 /// "xValues", "yValues", "weight");
1609 /// ~~~
1610 ///
1611 /// See the first Profile1D() overload for more details.
1612 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1613 typename W = RDFDetail::RInferredType>
1616 {
1617 std::shared_ptr<::TProfile> h(nullptr);
1618 {
1619 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1620 h = model.GetProfile();
1621 }
1622
1623 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1624 throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
1625 }
1626 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1627 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1628 ? ColumnNames_t()
1629 : ColumnNames_t(columnViews.begin(), columnViews.end());
1630 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h);
1631 }
1632
1633 ////////////////////////////////////////////////////////////////////////////
1634 /// \brief Fill and return a one-dimensional profile (*lazy action*).
1635 /// See the first Profile1D() overload for more details.
1636 template <typename V1, typename V2, typename W>
1638 {
1639 return Profile1D<V1, V2, W>(model, "", "", "");
1640 }
1641
1642 ////////////////////////////////////////////////////////////////////////////
1643 /// \brief Fill and return a two-dimensional profile (*lazy action*).
1644 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1645 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1646 /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1647 /// \param[in] model The returned profile will be constructed using this as a model.
1648 /// \param[in] v1Name The name of the column that will fill the x axis.
1649 /// \param[in] v2Name The name of the column that will fill the y axis.
1650 /// \param[in] v3Name The name of the column that will fill the z axis.
1651 /// \return the bidimensional profile wrapped in a RResultPtr.
1652 ///
1653 /// This action is *lazy*: upon invocation of this method the calculation is
1654 /// booked but not executed. Also see RResultPtr.
1655 ///
1656 /// ### Example usage:
1657 /// ~~~{.cpp}
1658 /// // Deduce column types (this invocation needs jitting internally)
1659 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1660 /// "xValues", "yValues", "zValues");
1661 /// // Explicit column types
1662 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1663 /// "xValues", "yValues", "zValues");
1664 /// ~~~
1665 ///
1666 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
1667 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1668 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1669 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1670 typename V3 = RDFDetail::RInferredType>
1672 std::string_view v2Name = "", std::string_view v3Name = "")
1673 {
1674 std::shared_ptr<::TProfile2D> h(nullptr);
1675 {
1676 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1677 h = model.GetProfile();
1678 }
1679
1680 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1681 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1682 }
1683 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1684 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1685 ? ColumnNames_t()
1686 : ColumnNames_t(columnViews.begin(), columnViews.end());
1687 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h);
1688 }
1689
1690 ////////////////////////////////////////////////////////////////////////////
1691 /// \brief Fill and return a two-dimensional profile (*lazy action*).
1692 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1693 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1694 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1695 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1696 /// \param[in] model The returned histogram will be constructed using this as a model.
1697 /// \param[in] v1Name The name of the column that will fill the x axis.
1698 /// \param[in] v2Name The name of the column that will fill the y axis.
1699 /// \param[in] v3Name The name of the column that will fill the z axis.
1700 /// \param[in] wName The name of the column that will provide the weights.
1701 /// \return the bidimensional profile wrapped in a RResultPtr.
1702 ///
1703 /// This action is *lazy*: upon invocation of this method the calculation is
1704 /// booked but not executed. Also see RResultPtr.
1705 ///
1706 /// ### Example usage:
1707 /// ~~~{.cpp}
1708 /// // Deduce column types (this invocation needs jitting internally)
1709 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1710 /// "xValues", "yValues", "zValues", "weight");
1711 /// // Explicit column types
1712 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1713 /// "xValues", "yValues", "zValues", "weight");
1714 /// ~~~
1715 ///
1716 /// See the first Profile2D() overload for more details.
1717 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1718 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1720 std::string_view v3Name, std::string_view wName)
1721 {
1722 std::shared_ptr<::TProfile2D> h(nullptr);
1723 {
1724 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1725 h = model.GetProfile();
1726 }
1727
1728 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1729 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1730 }
1731 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1732 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1733 ? ColumnNames_t()
1734 : ColumnNames_t(columnViews.begin(), columnViews.end());
1735 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h);
1736 }
1737
1738 /// \brief Fill and return a two-dimensional profile (*lazy action*).
1739 /// See the first Profile2D() overload for more details.
1740 template <typename V1, typename V2, typename V3, typename W>
1742 {
1743 return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
1744 }
1745
1746 ////////////////////////////////////////////////////////////////////////////
1747 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*).
1748 ///
1749 /// Type T must provide at least:
1750 /// - a copy-constructor
1751 /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList
1752 /// (these types can also be passed as template parameters to this method)
1753 /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the
1754 /// objects assed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that
1755 /// if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in
1756 /// the TCollection*).
1757 ///
1758 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
1759 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
1760 /// \tparam T The type of the object to fill. Automatically deduced.
1761 /// \param[in] model The model to be considered to build the new return value.
1762 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1763 /// \return the filled object wrapped in a RResultPtr.
1764 ///
1765 /// The user gives up ownership of the model object.
1766 /// The list of column names to be used for filling must always be specified.
1767 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
1768 /// Also see RResultPtr.
1769 ///
1770 /// ### Example usage:
1771 /// ~~~{.cpp}
1772 /// MyClass obj;
1773 /// // Deduce column types (this invocation needs jitting internally, and in this case
1774 /// // MyClass needs to be known to the interpreter)
1775 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
1776 /// // explicit column types
1777 /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});
1778 /// ~~~
1779 ///
1780 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T>
1781 RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList)
1782 {
1783 auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model));
1784 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1785 throw std::runtime_error("The absence of axes limits is not supported yet.");
1786 }
1787 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h,
1788 columnList.size());
1789 }
1790
1791 ////////////////////////////////////////////////////////////////////////////
1792 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
1793 ///
1794 /// \tparam V The type of the value column
1795 /// \param[in] value The name of the column with the values to fill the statistics with.
1796 /// \return the filled TStatistic object wrapped in a RResultPtr.
1797 ///
1798 /// ### Example usage:
1799 /// ~~~{.cpp}
1800 /// // Deduce column type (this invocation needs jitting internally)
1801 /// auto stats0 = myDf.Stats("values");
1802 /// // Explicit column type
1803 /// auto stats1 = myDf.Stats<float>("values");
1804 /// ~~~
1805 ///
1806 template <typename V = RDFDetail::RInferredType>
1808 {
1809 ColumnNames_t columns;
1810 if (!value.empty()) {
1811 columns.emplace_back(std::string(value));
1812 }
1813 const auto validColumnNames = GetValidatedColumnNames(1, columns);
1814 if (std::is_same<V, RDFDetail::RInferredType>::value) {
1815 return Fill(TStatistic(), validColumnNames);
1816 } else {
1817 return Fill<V>(TStatistic(), validColumnNames);
1818 }
1819 }
1820
1821 ////////////////////////////////////////////////////////////////////////////
1822 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
1823 ///
1824 /// \tparam V The type of the value column
1825 /// \tparam W The type of the weight column
1826 /// \param[in] value The name of the column with the values to fill the statistics with.
1827 /// \param[in] weight The name of the column with the weights to fill the statistics with.
1828 /// \return the filled TStatistic object wrapped in a RResultPtr.
1829 ///
1830 /// ### Example usage:
1831 /// ~~~{.cpp}
1832 /// // Deduce column types (this invocation needs jitting internally)
1833 /// auto stats0 = myDf.Stats("values", "weights");
1834 /// // Explicit column types
1835 /// auto stats1 = myDf.Stats<int, float>("values", "weights");
1836 /// ~~~
1837 ///
1838 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1840 {
1841 ColumnNames_t columns{std::string(value), std::string(weight)};
1842 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
1843 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
1844 const auto validColumnNames = GetValidatedColumnNames(2, columns);
1845 // We have 3 cases:
1846 // 1. Both types are inferred: we use Fill and let the jit kick in.
1847 // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
1848 // 3. Both types are explicit: we invoke the fully compiled Fill method.
1849 if (vIsInferred && wIsInferred) {
1850 return Fill(TStatistic(), validColumnNames);
1851 } else if (vIsInferred != wIsInferred) {
1852 std::string error("The ");
1853 error += vIsInferred ? "value " : "weight ";
1854 error += "column type is explicit, while the ";
1855 error += vIsInferred ? "weight " : "value ";
1856 error += " is specified to be inferred. This case is not supported: please specify both types or none.";
1857 throw std::runtime_error(error);
1858 } else {
1859 return Fill<V, W>(TStatistic(), validColumnNames);
1860 }
1861 }
1862
1863 ////////////////////////////////////////////////////////////////////////////
1864 /// \brief Return the minimum of processed column values (*lazy action*).
1865 /// \tparam T The type of the branch/column.
1866 /// \param[in] columnName The name of the branch/column to be treated.
1867 /// \return the minimum value of the selected column wrapped in a RResultPtr.
1868 ///
1869 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1870 /// template specialization of this method.
1871 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1872 ///
1873 /// This action is *lazy*: upon invocation of this method the calculation is
1874 /// booked but not executed. Also see RResultPtr.
1875 ///
1876 /// ### Example usage:
1877 /// ~~~{.cpp}
1878 /// // Deduce column type (this invocation needs jitting internally)
1879 /// auto minVal0 = myDf.Min("values");
1880 /// // Explicit column type
1881 /// auto minVal1 = myDf.Min<double>("values");
1882 /// ~~~
1883 ///
1884 template <typename T = RDFDetail::RInferredType>
1886 {
1887 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1888 using RetType_t = RDFDetail::MinReturnType_t<T>;
1889 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
1890 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV);
1891 }
1892
1893 ////////////////////////////////////////////////////////////////////////////
1894 /// \brief Return the maximum of processed column values (*lazy action*).
1895 /// \tparam T The type of the branch/column.
1896 /// \param[in] columnName The name of the branch/column to be treated.
1897 /// \return the maximum value of the selected column wrapped in a RResultPtr.
1898 ///
1899 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1900 /// template specialization of this method.
1901 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1902 ///
1903 /// This action is *lazy*: upon invocation of this method the calculation is
1904 /// booked but not executed. Also see RResultPtr.
1905 ///
1906 /// ### Example usage:
1907 /// ~~~{.cpp}
1908 /// // Deduce column type (this invocation needs jitting internally)
1909 /// auto maxVal0 = myDf.Max("values");
1910 /// // Explicit column type
1911 /// auto maxVal1 = myDf.Max<double>("values");
1912 /// ~~~
1913 ///
1914 template <typename T = RDFDetail::RInferredType>
1916 {
1917 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1918 using RetType_t = RDFDetail::MaxReturnType_t<T>;
1919 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
1920 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV);
1921 }
1922
1923 ////////////////////////////////////////////////////////////////////////////
1924 /// \brief Return the mean of processed column values (*lazy action*).
1925 /// \tparam T The type of the branch/column.
1926 /// \param[in] columnName The name of the branch/column to be treated.
1927 /// \return the mean value of the selected column wrapped in a RResultPtr.
1928 ///
1929 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1930 /// template specialization of this method.
1931 ///
1932 /// This action is *lazy*: upon invocation of this method the calculation is
1933 /// booked but not executed. Also see RResultPtr.
1934 ///
1935 /// ### Example usage:
1936 /// ~~~{.cpp}
1937 /// // Deduce column type (this invocation needs jitting internally)
1938 /// auto meanVal0 = myDf.Mean("values");
1939 /// // Explicit column type
1940 /// auto meanVal1 = myDf.Mean<double>("values");
1941 /// ~~~
1942 ///
1943 template <typename T = RDFDetail::RInferredType>
1945 {
1946 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1947 auto meanV = std::make_shared<double>(0);
1948 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV);
1949 }
1950
1951 ////////////////////////////////////////////////////////////////////////////
1952 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*).
1953 /// \tparam T The type of the branch/column.
1954 /// \param[in] columnName The name of the branch/column to be treated.
1955 /// \return the standard deviation value of the selected column wrapped in a RResultPtr.
1956 ///
1957 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1958 /// template specialization of this method.
1959 ///
1960 /// This action is *lazy*: upon invocation of this method the calculation is
1961 /// booked but not executed. Also see RResultPtr.
1962 ///
1963 /// ### Example usage:
1964 /// ~~~{.cpp}
1965 /// // Deduce column type (this invocation needs jitting internally)
1966 /// auto stdDev0 = myDf.StdDev("values");
1967 /// // Explicit column type
1968 /// auto stdDev1 = myDf.StdDev<double>("values");
1969 /// ~~~
1970 ///
1971 template <typename T = RDFDetail::RInferredType>
1973 {
1974 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1975 auto stdDeviationV = std::make_shared<double>(0);
1976 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV);
1977 }
1978
1979 // clang-format off
1980 ////////////////////////////////////////////////////////////////////////////
1981 /// \brief Return the sum of processed column values (*lazy action*).
1982 /// \tparam T The type of the branch/column.
1983 /// \param[in] columnName The name of the branch/column.
1984 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
1985 /// \return the sum of the selected column wrapped in a RResultPtr.
1986 ///
1987 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1988 /// template specialization of this method.
1989 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1990 ///
1991 /// This action is *lazy*: upon invocation of this method the calculation is
1992 /// booked but not executed. Also see RResultPtr.
1993 ///
1994 /// ### Example usage:
1995 /// ~~~{.cpp}
1996 /// // Deduce column type (this invocation needs jitting internally)
1997 /// auto sum0 = myDf.Sum("values");
1998 /// // Explicit column type
1999 /// auto sum1 = myDf.Sum<double>("values");
2000 /// ~~~
2001 ///
2002 template <typename T = RDFDetail::RInferredType>
2004 Sum(std::string_view columnName = "",
2005 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
2006 {
2007 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2008 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
2009 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV);
2010 }
2011 // clang-format on
2012
2013 ////////////////////////////////////////////////////////////////////////////
2014 /// \brief Gather filtering statistics.
2015 /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr.
2016 ///
2017 /// Calling `Report` on the main `RDataFrame` object gathers stats for
2018 /// all named filters in the call graph. Calling this method on a
2019 /// stored chain state (i.e. a graph node different from the first) gathers
2020 /// the stats for all named filters in the chain section between the original
2021 /// `RDataFrame` and that node (included). Stats are gathered in the same
2022 /// order as the named filters have been added to the graph.
2023 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
2024 /// effects cuts had.
2025 ///
2026 /// This action is *lazy*: upon invocation of
2027 /// this method the calculation is booked but not executed. See RResultPtr
2028 /// documentation.
2029 ///
2030 /// ### Example usage:
2031 /// ~~~{.cpp}
2032 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
2033 /// auto cutReport = filtered3.Report();
2034 /// cutReport->Print();
2035 /// ~~~
2036 ///
2038 {
2039 bool returnEmptyReport = false;
2040 // if this is a RInterface<RLoopManager> on which `Define` has been called, users
2041 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
2042 // certainly does not contain named filters.
2043 // The number 4 takes into account the implicit columns for entry and slot number
2044 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
2045 if (std::is_same<Proxied, RLoopManager>::value && fDefines.GetNames().size() > 4)
2046 returnEmptyReport = true;
2047
2048 auto rep = std::make_shared<RCutFlowReport>();
2049 using Helper_t = RDFInternal::ReportHelper<Proxied>;
2051
2052 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}),
2054
2055 fLoopManager->Book(action.get());
2056 return MakeResultPtr(rep, *fLoopManager, std::move(action));
2057 }
2058
2059 /////////////////////////////////////////////////////////////////////////////
2060 /// \brief Returns the names of the available columns.
2061 /// \return the container of column names.
2062 ///
2063 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2064 ///
2065 /// ### Example usage:
2066 /// ~~~{.cpp}
2067 /// auto colNames = d.GetColumnNames();
2068 /// // Print columns' names
2069 /// for (auto &&colName : colNames) std::cout << colName << std::endl;
2070 /// ~~~
2071 ///
2073 {
2074 ColumnNames_t allColumns;
2075
2076 auto addIfNotInternal = [&allColumns](std::string_view colName) {
2077 if (!RDFInternal::IsInternalColumn(colName))
2078 allColumns.emplace_back(colName);
2079 };
2080
2081 auto columnNames = fDefines.GetNames();
2082
2083 std::for_each(columnNames.begin(), columnNames.end(), addIfNotInternal);
2084
2085 auto tree = fLoopManager->GetTree();
2086 if (tree) {
2087 auto branchNames = RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false);
2088 allColumns.insert(allColumns.end(), branchNames.begin(), branchNames.end());
2089 }
2090
2091 if (fDataSource) {
2092 const auto &dsColNames = fDataSource->GetColumnNames();
2093 // ignore columns starting with R_rdf_sizeof_
2094 std::copy_if(dsColNames.begin(), dsColNames.end(), std::back_inserter(allColumns),
2095 [](const std::string &s) { return s.rfind("R_rdf_sizeof", 0) != 0; });
2096 }
2097
2098 return allColumns;
2099 }
2100
2101 /////////////////////////////////////////////////////////////////////////////
2102 /// \brief Return the type of a given column as a string.
2103 /// \return the type of the required column.
2104 ///
2105 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2106 ///
2107 /// ### Example usage:
2108 /// ~~~{.cpp}
2109 /// auto colType = d.GetColumnType("columnName");
2110 /// // Print column type
2111 /// std::cout << "Column " << colType << " has type " << colType << std::endl;
2112 /// ~~~
2113 ///
2115 {
2116 const auto col = RDFInternal::ResolveAlias(std::string(column), fLoopManager->GetAliasMap());
2117
2118 RDFDetail::RDefineBase *define = fDefines.HasName(col) ? fDefines.GetColumns().at(col).get() : nullptr;
2119
2120 const bool convertVector2RVec = true;
2122 convertVector2RVec);
2123 }
2124
2125 /////////////////////////////////////////////////////////////////////////////
2126 /// \brief Return information about the dataframe.
2127 /// \return information about the dataframe as string
2128 ///
2129 /// This convenience function describes the dataframe and combines the following information:
2130 /// - Information about the dataset, see DescribeDataset()
2131 /// - Number of event loops run, see GetNRuns()
2132 /// - Number of total and defined columns, see GetColumnNames() and GetDefinedColumnNames()
2133 /// - Column names, see GetColumnNames()
2134 /// - Column types, see GetColumnType()
2135 /// - Number of processing slots, see GetNSlots()
2136 ///
2137 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2138 /// The result is dependent on the node from which this method is called, e.g. the list of
2139 /// defined columns returned by GetDefinedColumnNames().
2140 ///
2141 /// Please note that this is a convenience feature and the layout of the output can be subject
2142 /// to change and should not be automatically parsed.
2143 ///
2144 /// ### Example usage:
2145 /// ~~~{.cpp}
2146 /// RDataFrame df(10);
2147 /// auto df2 = df.Define("x", "1.f").Define("s", "\"myStr\"");
2148 /// // Describe the dataframe
2149 /// std::cout << df2.Describe() << std::endl;
2150 /// ~~~
2151 ///
2152 std::string Describe()
2153 {
2154 // Put the information from DescribeDataset on the top
2155 std::stringstream ss;
2156 ss << DescribeDataset() << "\n\n";
2157
2158 // Build set of defined column names to find later in all column names
2159 // the defined columns more efficiently
2160 const auto columnNames = GetColumnNames();
2161 std::set<std::string> definedColumnNamesSet;
2162 for (const auto &name : GetDefinedColumnNames())
2163 definedColumnNamesSet.insert(name);
2164
2165 // Get information for the metadata table
2166 const std::vector<std::string> metadataProperties = {"Columns in total", "Columns from defines",
2167 "Event loops run", "Processing slots"};
2168 const std::vector<std::string> metadataValues = {std::to_string(columnNames.size()),
2169 std::to_string(definedColumnNamesSet.size()),
2170 std::to_string(GetNRuns()), std::to_string(GetNSlots())};
2171
2172 // Set header for metadata table
2173 const auto columnWidthProperties = RDFInternal::GetColumnWidth(metadataProperties);
2174 // The column width of the values is required to make right-bound numbers and is equal
2175 // to the maximum of the string "Value" and all values to be put in this column.
2176 const auto columnWidthValues =
2177 std::max(std::max_element(metadataValues.begin(), metadataValues.end())->size(), static_cast<std::size_t>(5u));
2178 ss << std::left << std::setw(columnWidthProperties) << "Property" << std::setw(columnWidthValues) << "Value\n"
2179 << std::setw(columnWidthProperties) << "--------" << std::setw(columnWidthValues) << "-----\n";
2180
2181 // Build metadata table
2182 // All numbers should be bound to the right and strings bound to the left.
2183 for (auto i = 0u; i < metadataProperties.size(); i++) {
2184 ss << std::left << std::setw(columnWidthProperties) << metadataProperties[i] << std::right
2185 << std::setw(columnWidthValues) << metadataValues[i] << '\n';
2186 }
2187 ss << '\n'; // put space between this and the next table
2188
2189 // Set header for columns table
2190 const auto columnWidthNames = RDFInternal::GetColumnWidth(columnNames);
2191 const auto columnTypes = GetColumnTypeNamesList(columnNames);
2192 const auto columnWidthTypes = RDFInternal::GetColumnWidth(columnTypes);
2193 ss << std::left << std::setw(columnWidthNames) << "Column" << std::setw(columnWidthTypes) << "Type"
2194 << "Origin\n"
2195 << std::setw(columnWidthNames) << "------" << std::setw(columnWidthTypes) << "----"
2196 << "------\n";
2197
2198 // Build columns table
2199 const auto nCols = columnNames.size();
2200 for (auto i = 0u; i < nCols; i++) {
2201 auto origin = "Dataset";
2202 if (definedColumnNamesSet.find(columnNames[i]) != definedColumnNamesSet.end())
2203 origin = "Define";
2204 ss << std::left << std::setw(columnWidthNames) << columnNames[i] << std::setw(columnWidthTypes)
2205 << columnTypes[i] << origin;
2206 if (i < nCols - 1)
2207 ss << '\n';
2208 }
2209
2210 return ss.str();
2211 }
2212
2213 /// \brief Returns the names of the filters created.
2214 /// \return the container of filters names.
2215 ///
2216 /// If called on a root node, all the filters in the computation graph will
2217 /// be printed. For any other node, only the filters upstream of that node.
2218 /// Filters without a name are printed as "Unnamed Filter"
2219 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2220 ///
2221 /// ### Example usage:
2222 /// ~~~{.cpp}
2223 /// auto filtNames = d.GetFilterNames();
2224 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
2225 /// ~~~
2226 ///
2227 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
2228
2229 /// \brief Returns the names of the defined columns.
2230 /// \return the container of the defined column names.
2231 ///
2232 /// This is not an action nor a transformation, just a simple utility to
2233 /// get the columns names that have been defined up to the node.
2234 /// If no column has been defined, e.g. on a root node, it returns an
2235 /// empty collection.
2236 ///
2237 /// ### Example usage:
2238 /// ~~~{.cpp}
2239 /// auto defColNames = d.GetDefinedColumnNames();
2240 /// // Print defined columns' names
2241 /// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
2242 /// ~~~
2243 ///
2245 {
2246 ColumnNames_t definedColumns;
2247
2248 auto columns = fDefines.GetColumns();
2249
2250 for (const auto &column : columns) {
2251 if (!RDFInternal::IsInternalColumn(column.first))
2252 definedColumns.emplace_back(column.first);
2253 }
2254
2255 return definedColumns;
2256 }
2257
2258 /// \brief Checks if a column is present in the dataset.
2259 /// \return true if the column is available, false otherwise
2260 ///
2261 /// This method checks if a column is part of the input ROOT dataset, has
2262 /// been defined or can be provided by the data source.
2263 ///
2264 /// Example usage:
2265 /// ~~~{.cpp}
2266 /// ROOT::RDataFrame base(1);
2267 /// auto rdf = base.Define("definedColumn", [](){return 0;});
2268 /// rdf.HasColumn("definedColumn"); // true: we defined it
2269 /// rdf.HasColumn("rdfentry_"); // true: it's always there
2270 /// rdf.HasColumn("foo"); // false: it is not there
2271 /// ~~~
2273 {
2274 if (fDefines.HasName(columnName))
2275 return true;
2276
2277 if (auto tree = fLoopManager->GetTree()) {
2278 const auto &branchNames = fLoopManager->GetBranchNames();
2279 const auto branchNamesEnd = branchNames.end();
2280 if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
2281 return true;
2282 }
2283
2284 if (fDataSource && fDataSource->HasColumn(columnName))
2285 return true;
2286
2287 return false;
2288 }
2289
2290 /// \brief Gets the number of data processing slots.
2291 /// \return The number of data processing slots used by this RDataFrame instance
2292 ///
2293 /// This method returns the number of data processing slots used by this RDataFrame
2294 /// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
2295 ///
2296 /// Example usage:
2297 /// ~~~{.cpp}
2298 /// ROOT::EnableImplicitMT(6)
2299 /// ROOT::RDataFrame df(1);
2300 /// std::cout << df.GetNSlots() << std::endl; // prints "6"
2301 /// ~~~
2302 unsigned int GetNSlots() const { return fLoopManager->GetNSlots(); }
2303
2304 /// \brief Gets the number of event loops run.
2305 /// \return The number of event loops run by this RDataFrame instance
2306 ///
2307 /// This method returns the number of events loops run so far by this RDataFrame instance.
2308 ///
2309 /// Example usage:
2310 /// ~~~{.cpp}
2311 /// ROOT::RDataFrame df(1);
2312 /// std::cout << df.GetNRuns() << std::endl; // prints "0"
2313 /// df.Sum("rdfentry_").GetValue(); // trigger the event loop
2314 /// std::cout << df.GetNRuns() << std::endl; // prints "1"
2315 /// df.Sum("rdfentry_").GetValue(); // trigger another event loop
2316 /// std::cout << df.GetNRuns() << std::endl; // prints "2"
2317 /// ~~~
2318 unsigned int GetNRuns() const { return fLoopManager->GetNRuns(); }
2319
2320 /// \brief Get descriptive information about the dataset.
2321 /// \return Info describing the dataset as a multi-line string
2322 ///
2323 /// The information returned by this convenience function is meant for interactive
2324 /// use. The exact string format should not be parsed automatically and can be subject to change.
2325 ///
2326 /// Example usage:
2327 /// ~~~{.cpp}
2328 /// ROOT::RDataFrame df("Events", "sample.root");
2329 /// std::cout << df.DescribeDataset() << std::endl;
2330 /// // prints "Dataframe from TTree Events in file sample.root"
2331 /// ~~~
2332 std::string DescribeDataset() const
2333 {
2334 // TTree/TChain as input
2335 const auto tree = fLoopManager->GetTree();
2336 if (tree) {
2337 const auto treeName = tree->GetName();
2338 const auto isTChain = dynamic_cast<TChain *>(tree) ? true : false;
2339 const auto treeType = isTChain ? "TChain" : "TTree";
2340 const auto isInMemory = !isTChain && !tree->GetCurrentFile() ? true : false;
2341 const auto friendInfo = ROOT::Internal::TreeUtils::GetFriendInfo(*tree);
2342 const auto hasFriends = friendInfo.fFriendNames.empty() ? false : true;
2343 std::stringstream ss;
2344 ss << "Dataframe from " << treeType << " " << treeName;
2345 if (isInMemory) {
2346 ss << " (in-memory)";
2347 } else {
2349 const auto numFiles = files.size();
2350 if (numFiles == 1) {
2351 ss << " in file " << files[0];
2352 } else {
2353 ss << " in files\n";
2354 for (auto i = 0u; i < numFiles; i++) {
2355 ss << " " << files[i];
2356 if (i < numFiles - 1)
2357 ss << '\n';
2358 }
2359 }
2360 }
2361 if (hasFriends) {
2362 const auto numFriends = friendInfo.fFriendNames.size();
2363 if (numFriends == 1) {
2364 ss << "\nwith friend\n";
2365 } else {
2366 ss << "\nwith friends\n";
2367 }
2368 for (auto i = 0u; i < numFriends; i++) {
2369 const auto nameAlias = friendInfo.fFriendNames[i];
2370 const auto files = friendInfo.fFriendFileNames[i];
2371 const auto numFiles = files.size();
2372 const auto subnames = friendInfo.fFriendChainSubNames[i];
2373 ss << " " << nameAlias.first;
2374 if (nameAlias.first != nameAlias.second)
2375 ss << " (" << nameAlias.second << ")";
2376 // case: TTree as friend
2377 if (numFiles == 1) {
2378 ss << " " << files[0];
2379 }
2380 // case: TChain as friend
2381 else {
2382 ss << '\n';
2383 for (auto j = 0u; j < numFiles; j++) {
2384 ss << " " << subnames[j] << " " << files[j];
2385 if (j < numFiles - 1)
2386 ss << '\n';
2387 }
2388 }
2389 if (i < numFriends - 1)
2390 ss << '\n';
2391 }
2392 }
2393 return ss.str();
2394 }
2395 // Datasource as input
2396 else if (fDataSource) {
2397 const auto datasourceLabel = fDataSource->GetLabel();
2398 return "Dataframe from datasource " + datasourceLabel;
2399 }
2400 // Trivial/empty datasource
2401 else {
2402 const auto n = fLoopManager->GetNEmptyEntries();
2403 if (n == 1) {
2404 return "Empty dataframe filling 1 row";
2405 } else {
2406 return "Empty dataframe filling " + std::to_string(n) + " rows";
2407 }
2408 }
2409 }
2410
2411 // clang-format off
2412 ////////////////////////////////////////////////////////////////////////////
2413 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2414 /// \tparam F The type of the aggregator callable. Automatically deduced.
2415 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2416 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2417 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
2418 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2419 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2420 /// \param[in] aggIdentity The aggregator variable of each thread is initialised to this value (or is default-constructed if the parameter is omitted)
2421 /// \return the result of the aggregation wrapped in a RResultPtr.
2422 ///
2423 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2424 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2425 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2426 /// the value of the column columnName.
2427 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2428 /// Otherwise the signature of aggregator must be `void(U&,T)`.
2429 ///
2430 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2431 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2432 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2433 ///
2434 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2435 ///
2436 /// Example usage:
2437 /// ~~~{.cpp}
2438 /// auto aggregator = [](double acc, double x) { return acc * x; };
2439 /// ROOT::EnableImplicitMT();
2440 /// // If multithread is enabled, the aggregator function will be called by more threads
2441 /// // and will produce a vector of partial accumulators.
2442 /// // The merger function performs the final aggregation of these partial results.
2443 /// auto merger = [](std::vector<double> &accumulators) {
2444 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2445 /// accumulators[0] *= accumulators[i];
2446 /// }
2447 /// };
2448 ///
2449 /// // The accumulator is initialized at this value by every thread.
2450 /// double initValue = 1.;
2451 ///
2452 /// // Multiplies all elements of the column "x"
2453 /// auto result = d.Aggregate(aggregator, merger, columnName, initValue);
2454 /// ~~~
2455 // clang-format on
2456 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2457 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2458 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2459 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2460 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2461 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2462 {
2463 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2464 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2465
2466 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2467 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
2468
2469 auto accObjPtr = std::make_shared<U>(aggIdentity);
2470 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2472 auto action = std::make_unique<Action_t>(
2473 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2475 fLoopManager->Book(action.get());
2476 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2477 }
2478
2479 // clang-format off
2480 ////////////////////////////////////////////////////////////////////////////
2481 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2482 /// \tparam F The type of the aggregator callable. Automatically deduced.
2483 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2484 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2485 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2486 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2487 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2488 /// \return the result of the aggregation wrapped in a RResultPtr.
2489 ///
2490 /// See previous Aggregate overload for more information.
2491 // clang-format on
2492 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2493 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2494 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2495 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2496 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2497 {
2498 static_assert(
2499 std::is_default_constructible<U>::value,
2500 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2501 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2502 }
2503
2504 // clang-format off
2505 ////////////////////////////////////////////////////////////////////////////
2506 /// \brief Book execution of a custom action using a user-defined helper object.
2507 /// \tparam FirstColumn The type of the first column used by this action. Inferred together with OtherColumns if not present.
2508 /// \tparam OtherColumns A list of the types of the other columns used by this action
2509 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2510 /// \param[in] helper The Action Helper to be scheduled.
2511 /// \param[in] columns The names of the columns on which the helper acts.
2512 /// \return the result of the helper wrapped in a RResultPtr.
2513 ///
2514 /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2515 /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
2516 /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2517 ///
2518 /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
2519 /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
2520 /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
2521 /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
2522 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2523 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2524 /// the requested columns for the particular entry being processed.
2525 /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
2526 /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
2527 /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
2528 /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations. Can be no-op.
2529 /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2530 /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
2531 /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
2532 /// method concurrently, but will always pass different 'slot' numbers.
2533 /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
2534 /// Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called
2535 /// before Initialize(), because the RResultPtr is constructed before the event loop is started.
2536 ///
2537 /// In case this is called without specifying column types, jitting is used,
2538 /// and the Helper class needs to be known to the interpreter.
2539 ///
2540 /// See ActionHelpers.hxx for the helpers used by standard RDF actions.
2541 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2542 // clang-format on
2543
2544 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper>
2546 {
2547 using HelperT = std::decay_t<Helper>;
2548 // TODO add more static sanity checks on Helper
2550 static_assert(std::is_base_of<AH, HelperT>::value && std::is_convertible<HelperT *, AH *>::value,
2551 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2552
2553 auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper));
2554 auto resPtr = hPtr->GetResultPtr();
2555
2556 if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) {
2557 return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{});
2558 } else {
2559 return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
2560 columns.size());
2561 }
2562 }
2563
2564 ////////////////////////////////////////////////////////////////////////////
2565 /// \brief Provides a representation of the columns in the dataset.
2566 /// \tparam ColumnTypes variadic list of branch/column types.
2567 /// \param[in] columnList Names of the columns to be displayed.
2568 /// \param[in] nRows Number of events for each column to be displayed.
2569 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
2570 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2571 ///
2572 /// This function returns a RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
2573 /// form. RDisplay will either print on the standard output a summarized version through `Print()` or will return a
2574 /// complete version through `AsString()`.
2575 ///
2576 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see
2577 /// RResultPtr.
2578 ///
2579 /// Example usage:
2580 /// ~~~{.cpp}
2581 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
2582 /// auto d1 = rdf.Display("");
2583 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
2584 /// auto d2 = d.Display({"x", "y"}, 128);
2585 /// // Printing the short representations, the event loop will run
2586 /// d1->Print();
2587 /// d2->Print();
2588 /// ~~~
2589 template <typename... ColumnTypes>
2590 RResultPtr<RDisplay>
2591 Display(const ColumnNames_t &columnList, int nRows = 5, size_t nMaxCollectionElements = 10)
2592 {
2593 CheckIMTDisabled("Display");
2594
2595 auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows,
2596 nMaxCollectionElements);
2597 return CreateAction<RDFInternal::ActionTags::Display, ColumnTypes...>(columnList, displayer, displayer);
2598 }
2599
2600 ////////////////////////////////////////////////////////////////////////////
2601 /// \brief Provides a representation of the columns in the dataset.
2602 /// \param[in] columnList Names of the columns to be displayed.
2603 /// \param[in] nRows Number of events for each column to be displayed.
2604 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
2605 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2606 ///
2607 /// This overload automatically infers the column types.
2608 /// See the previous overloads for further details.
2610 Display(const ColumnNames_t &columnList, int nRows = 5, size_t nMaxCollectionElements = 10)
2611 {
2612 CheckIMTDisabled("Display");
2613 auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows,
2614 nMaxCollectionElements);
2615 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(columnList, displayer, displayer,
2616 columnList.size());
2617 }
2618
2619 ////////////////////////////////////////////////////////////////////////////
2620 /// \brief Provides a representation of the columns in the dataset.
2621 /// \param[in] columnNameRegexp A regular expression to select the columns.
2622 /// \param[in] nRows Number of events for each column to be displayed.
2623 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
2624 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2625 ///
2626 /// The existing columns are matched against the regular expression. If the string provided
2627 /// is empty, all columns are selected.
2628 /// See the previous overloads for further details.
2630 Display(std::string_view columnNameRegexp = "", int nRows = 5, size_t nMaxCollectionElements = 10)
2631 {
2632 const auto columnNames = GetColumnNames();
2633 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display");
2634 return Display(selectedColumns, nRows, nMaxCollectionElements);
2635 }
2636
2637 ////////////////////////////////////////////////////////////////////////////
2638 /// \brief Provides a representation of the columns in the dataset.
2639 /// \param[in] columnList Names of the columns to be displayed.
2640 /// \param[in] nRows Number of events for each column to be displayed.
2641 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2642 ///
2643 /// See the previous overloads for further details.
2644 RResultPtr<RDisplay> Display(std::initializer_list<std::string> columnList, int nRows = 5,
2645 size_t nMaxCollectionElements = 10)
2646 {
2647 ColumnNames_t selectedColumns(columnList);
2648 return Display(selectedColumns, nRows, nMaxCollectionElements);
2649 }
2650
2651private:
2653 {
2655
2656 // Entry number column
2657 const std::string entryColName = "rdfentry_";
2658 const std::string entryColType = "ULong64_t";
2659 auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
2660 using NewColEntry_t = RDFDetail::RDefine<decltype(entryColGen), RDFDetail::CustomColExtraArgs::SlotAndEntry>;
2661
2662 auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
2663 ColumnNames_t{}, fLoopManager->GetNSlots(), newCols,
2665 newCols.AddColumn(entryColumn, entryColName);
2666
2667 // Slot number column
2668 const std::string slotColName = "rdfslot_";
2669 const std::string slotColType = "unsigned int";
2670 auto slotColGen = [](unsigned int slot) { return slot; };
2671 using NewColSlot_t = RDFDetail::RDefine<decltype(slotColGen), RDFDetail::CustomColExtraArgs::Slot>;
2672
2673 auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen), ColumnNames_t{},
2674 fLoopManager->GetNSlots(), newCols,
2676 newCols.AddColumn(slotColumn, slotColName);
2677
2678 fDefines = std::move(newCols);
2679
2680 fLoopManager->AddColumnAlias("tdfentry_", entryColName);
2681 fDefines.AddName("tdfentry_");
2682 fLoopManager->AddColumnAlias("tdfslot_", slotColName);
2683 fDefines.AddName("tdfslot_");
2684 }
2685
2686 std::vector<std::string> GetColumnTypeNamesList(const ColumnNames_t &columnList)
2687 {
2688 std::vector<std::string> types;
2689
2690 for (auto column : columnList) {
2691 types.push_back(GetColumnType(column));
2692 }
2693 return types;
2694 }
2695
2697 {
2699 std::string error(callerName);
2700 error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
2701 throw std::runtime_error(error);
2702 }
2703 }
2704
2705 /// Create RAction object, return RResultPtr for the action
2706 /// Overload for the case in which all column types were specified (no jitting).
2707 /// For most actions, `r` and `helperArg` will refer to the same object, because the only argument to forward to
2708 /// the action helper is the result value itself. We need the distinction for actions such as Snapshot or Cache,
2709 /// for which the constructor arguments of the action helper are different from the returned value.
2710 template <typename ActionTag, typename... ColTypes, typename ActionResultType,
2711 typename HelperArgType = ActionResultType,
2712 std::enable_if_t<!RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
2714 CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
2715 const std::shared_ptr<HelperArgType> &helperArg, const int /*nColumns*/ = -1)
2716 {
2717 constexpr auto nColumns = sizeof...(ColTypes);
2718
2719 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2721
2722 const auto nSlots = fLoopManager->GetNSlots();
2723
2724 auto action =
2725 RDFInternal::BuildAction<ColTypes...>(validColumnNames, helperArg, nSlots, fProxiedPtr, ActionTag{}, fDefines);
2726 fLoopManager->Book(action.get());
2727 fLoopManager->AddSampleCallback(action->GetSampleCallback());
2728 return MakeResultPtr(r, *fLoopManager, std::move(action));
2729 }
2730
2731 /// Create RAction object, return RResultPtr for the action
2732 /// Overload for the case in which one or more column types were not specified (RTTI + jitting).
2733 /// This overload has a `nColumns` optional argument. If present, the number of required columns for
2734 /// this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(ColTypes).
2735 template <typename ActionTag, typename... ColTypes, typename ActionResultType,
2736 typename HelperArgType = ActionResultType,
2737 std::enable_if_t<RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
2738 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
2739 const std::shared_ptr<HelperArgType> &helperArg, const int nColumns = -1)
2740 {
2741 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(ColTypes));
2742
2743 const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
2744 const unsigned int nSlots = fLoopManager->GetNSlots();
2745
2746 auto *tree = fLoopManager->GetTree();
2747 auto *helperArgOnHeap = RDFInternal::MakeSharedOnHeap(helperArg);
2748
2749 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
2750 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
2751 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fDefines, fDataSource);
2752
2753 const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(*fLoopManager);
2754 auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
2755
2756 auto toJit = RDFInternal::JitBuildAction(
2757 validColumnNames, upcastNodeOnHeap, typeid(std::shared_ptr<HelperArgType>), typeid(ActionTag), helperArgOnHeap,
2758 tree, nSlots, fDefines, fDataSource, jittedActionOnHeap);
2759 fLoopManager->Book(jittedAction.get());
2760 fLoopManager->ToJitExec(toJit);
2761 return MakeResultPtr(r, *fLoopManager, std::move(jittedAction));
2762 }
2763
2764 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
2765 std::enable_if_t<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>
2766 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
2767 {
2769 if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine
2773 } else {
2777 }
2778
2779 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
2780 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
2781 std::is_same<DefineType, RDFDetail::CustomColExtraArgs::Slot>::value, ArgTypes_t>::type;
2782 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
2783 std::is_same<DefineType, RDFDetail::CustomColExtraArgs::SlotAndEntry>::value, ColTypesTmp_t>::type;
2784
2785 constexpr auto nColumns = ColTypes_t::list_size;
2786
2787 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2788 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
2789
2790 // Declare return type to the interpreter, for future use by jitted actions
2791 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
2792 if (retTypeName.empty()) {
2793 // The type is not known to the interpreter.
2794 // We must not error out here, but if/when this column is used in jitted code
2795 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
2796 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
2797 }
2798
2799 using NewCol_t = RDFDetail::RDefine<F, DefineType>;
2800 auto newColumn =
2801 std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
2803
2805 newCols.AddColumn(newColumn, name);
2806
2807 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, newCols, fDataSource);
2808
2809 return newInterface;
2810 }
2811
2812 // This overload is chosen when the callable passed to Define or DefineSlot returns void.
2813 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
2814 // this way compilation of `Define` has no way to continue after throwing the error.
2815 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
2816 bool IsFStringConv = std::is_convertible<F, std::string>::value,
2817 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
2818 std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>
2820 {
2821 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
2822 "Error in `Define`: type returned by expression is not default-constructible");
2823 return *this; // never reached
2824 }
2825
2826 template <typename... ColumnTypes>
2828 const ColumnNames_t &columnList, const RSnapshotOptions &options)
2829 {
2830 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
2831
2832 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size());
2833 const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
2836
2837 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
2838 const auto &treename = parsedTreePath.fTreeName;
2839 const auto &dirname = parsedTreePath.fDirName;
2840
2841 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
2842 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
2843
2845 auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, validCols);
2846
2847 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs);
2848
2849 if (!options.fLazy)
2850 *resPtr;
2851 return resPtr;
2852 }
2853
2854 ////////////////////////////////////////////////////////////////////////////
2855 /// \brief Implementation of cache.
2856 template <typename... ColTypes, std::size_t... S>
2857 RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>)
2858 {
2859 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
2860
2861 // Check at compile time that the columns types are copy constructible
2862 constexpr bool areCopyConstructible =
2863 RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value;
2864 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
2865
2866 RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size());
2867
2868 auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
2869 auto ds = std::make_unique<RLazyDS<ColTypes...>>(
2870 std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
2871
2872 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns));
2873
2874 return cachedRDF;
2875 }
2876
2877 template <typename Helper, typename ActionResultType>
2878 auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr,
2879 const std::shared_ptr<Helper> &hPtr,
2881 -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
2882 {
2883 return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, 0u);
2884 }
2885
2886 template <typename Helper, typename ActionResultType, typename... Others>
2887 RResultPtr<ActionResultType>
2888 CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &,
2889 const std::shared_ptr<Helper>& /*hPtr*/,
2890 Others...)
2891 {
2892 throw std::logic_error(std::string("An action was booked with no input columns, but the action requires "
2893 "columns! The action helper type was ") +
2894 typeid(Helper).name());
2895 return {};
2896 }
2897
2898protected:
2899 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm, const RDFInternal::RBookedDefines &columns,
2900 RDataSource *ds)
2901 : fProxiedPtr(proxied), fLoopManager(&lm), fDataSource(ds), fDefines(columns)
2902 {
2903 }
2904
2906
2907 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
2908
2909 ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
2910 {
2912 }
2913
2914 template <typename... ColumnTypes>
2916 {
2917 if (fDataSource != nullptr)
2918 RDFInternal::AddDSColumns(validCols, *fLoopManager, *fDataSource, typeList, fDefines);
2919 }
2920};
2921
2922} // namespace RDF
2923
2924} // namespace ROOT
2925
2926#endif // ROOT_RDF_INTERFACE
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
#define h(i)
Definition: RSha256.hxx:106
unsigned int UInt_t
Definition: RtypesCore.h:46
unsigned long long ULong64_t
Definition: RtypesCore.h:81
const Int_t kError
Definition: TError.h:46
XFontStruct * id
Definition: TGX11.cxx:109
char name[80]
Definition: TGX11.cxx:110
int type
Definition: TGX11.cxx:121
The head node of a RDF computation graph.
const std::map< std::string, std::string > & GetAliasMap() const
ULong64_t GetNEmptyEntries() const
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
void AddSampleCallback(ROOT::RDF::SampleCallback_t &&callback)
unsigned int GetNRuns() const
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
void AddColumnAlias(const std::string &alias, const std::string &colName)
const std::map< std::string, std::vector< void * > > & GetDSValuePtrs() const
RDataSource * GetDataSource() const
unsigned int GetNSlots() const
void Book(RDFInternal::RActionBase *actionPtr)
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
Helper class that provides the operation graph nodes.
A RDataFrame node that produces a result.
Definition: RAction.hxx:52
Encapsulates the columns defined by the user.
bool HasName(std::string_view name) const
Check if the provided name is tracked in the names list.
void AddColumn(const std::shared_ptr< RDFDetail::RDefineBase > &column, std::string_view name)
Add a new booked column.
const RDefineBasePtrMap_t & GetColumns() const
Returns the list of the pointers to the defined columns.
ColumnNames_t GetNames() const
Returns the list of the names of the defined columns.
void AddName(std::string_view name)
Add a new name to the list returned by GetNames without booking a new column.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual bool HasColumn(std::string_view colName) const =0
Checks if the dataset has a certain column.
virtual std::string GetLabel()
Return a string representation of the datasource type.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
The public interface to the RDataFrame federation of classes.
Definition: RInterface.hxx:97
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RBookedDefines &columns, RDataSource *ds)
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:750
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action).
RLoopManager * GetLoopManager() const
std::string DescribeDataset() const
Get descriptive information about the dataset.
RResultPtr<::TGraph > Graph(std::string_view v1Name="", std::string_view v2Name="")
Fill and return a graph (lazy action).
RResultPtr< ActionResultType > CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &, const std::shared_ptr< Helper > &, Others...)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot.
Definition: RInterface.hxx:328
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action).
std::enable_if_t< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > > DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterface(const std::shared_ptr< Proxied > &proxied)
Only enabled when building a RInterface<RLoopManager>.
Definition: RInterface.hxx:133
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, F expression)
Define a new column that is updated when the input sample changes.
Definition: RInterface.hxx:514
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action).
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const int nColumns=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which one or more co...
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, int nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:841
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, int nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column.
Definition: RInterface.hxx:299
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action).
RInterface< Proxied, DS_t > Redefine(std::string_view name, std::string_view expression)
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:467
auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &resPtr, const std::shared_ptr< Helper > &hPtr, TTraits::TypeList< RDFDetail::RInferredType >) -> decltype(hPtr->Exec(0u), RResultPtr< ActionResultType >{})
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
Definition: RInterface.hxx:112
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", int nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
std::string Describe()
Return information about the dataframe.
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action).
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view fullTreeName, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
Fill and return a one-dimensional profile (lazy action).
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
Definition: RInterface.hxx:234
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, std::string_view expression)
Define a new column that is updated when the input sample changes.
Definition: RInterface.hxx:574
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:785
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
Definition: RInterface.hxx:610
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
Definition: RInterface.hxx:829
RInterface< Proxied, DS_t > Redefine(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:409
RDFInternal::RBookedDefines fDefines
Contains the columns defined up to this node.
Definition: RInterface.hxx:115
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
Definition: RInterface.hxx:890
RResultPtr< typename std::decay_t< Helper >::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RLoopManager * fLoopManager
Definition: RInterface.hxx:110
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, int nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
Definition: RInterface.hxx:103
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action).
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action).
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action).
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Define a new column.
Definition: RInterface.hxx:376
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
Definition: RInterface.hxx:108
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action).
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RInterface< Proxied, DS_t > RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:447
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... >)
Implementation of cache.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
Definition: RInterface.hxx:961
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
Definition: RInterface.hxx:915
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action).
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action).
RResultPtr< std::decay_t< T > > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action).
std::enable_if_t<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > > DefineImpl(std::string_view, F, const ColumnNames_t &)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:692
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RInterface< Proxied, DS_t > RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:428
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:710
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot and the current entry.
Definition: RInterface.hxx:358
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action).
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
Definition: RInterface.hxx:981
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:254
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
Fill and return a two-dimensional profile (lazy action).
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:194
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface(RInterface &&)=default
Move-ctor for RInterface.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNRuns() const
Gets the number of event loops run.
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action).
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
Definition: RInterface.hxx:218
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end).
Definition: RInterface.hxx:939
std::vector< std::string > GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action).
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action).
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action).
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:41
Smart pointer for the return type of actions.
Definition: RResultPtr.hxx:95
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
Definition: RSampleInfo.hxx:32
ROOT's RDataFrame offers a high level interface for analyses of data stored in TTree,...
Definition: RDataFrame.hxx:40
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
TDirectory::TContext keeps track and restore the current directory.
Definition: TDirectory.h:89
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
Statistical variable, defined by its mean and variance (RMS).
Definition: TStatistic.h:33
RFriendInfo GetFriendInfo(const TTree &tree)
Get and store the names, aliases and file names of the direct friends of the tree.
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
Get and store the file names associated with the input tree.
const Int_t n
Definition: legend1.C:16
basic_string_view< char > string_view
#define F(x, y, z)
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, RLoopManager &df, std::shared_ptr< ROOT::Internal::RDF::RActionBase > actionPtr)
Create a RResultPtr and set its pointer to the corresponding RAction This overload is invoked by non-...
Definition: RResultPtr.hxx:418
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
void CheckValidCppVarName(std::string_view var, const std::string &where)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:224
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:99
std::string ResolveAlias(const std::string &col, const std::map< std::string, std::string > &aliasMap)
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
Definition: RDFUtils.cxx:358
std::string PrettyPrintAddr(const void *const addr)
ColumnNames_t GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RBookedDefines &customCols, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RBookedDefines &customCols, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const ColumnNames_t &validDefines, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RBookedDefines &customCols, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition: RDFUtils.cxx:349
void BookFilterJit(const std::shared_ptr< RJittedFilter > &jittedFilter, std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &branches, const RBookedDefines &customCols, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RBookedDefines &defines, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const ColumnNames_t &customCols, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void CheckForDefinition(const std::string &where, std::string_view definedColView, const ColumnNames_t &customCols, const std::map< std::string, std::string > &aliasMap, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
double T(double x)
Definition: ChebyshevPol.h:34
std::vector< std::string > ColumnNames_t
Definition: Utils.hxx:35
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
ROOT type_traits extensions.
Definition: TypeTraits.hxx:21
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition: TROOT.cxx:527
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:558
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition: TROOT.cxx:544
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1195
RooArgSet S(Args_t &&... args)
Definition: RooArgSet.h:215
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
static constexpr double s
Definition: graph.py:1
Definition: tree.py:1
type is TypeList if MustRemove is false, otherwise it is a TypeList with the first type removed
Definition: Utils.hxx:139
A collection of options to steer the creation of the dataset on file.
bool fLazy
Do not start the event loop when Snapshot is called.
A struct which stores the parameters of a TH1D.
Definition: HistoModels.hxx:27
std::shared_ptr<::TH1D > GetHistogram() const
A struct which stores the parameters of a TH2D.
Definition: HistoModels.hxx:45
std::shared_ptr<::TH2D > GetHistogram() const
A struct which stores the parameters of a TH3D.
Definition: HistoModels.hxx:70
std::shared_ptr<::TH3D > GetHistogram() const
A struct which stores the parameters of a TProfile.
Definition: HistoModels.hxx:99
std::shared_ptr<::TProfile > GetProfile() const
A struct which stores the parameters of a TProfile2D.
std::shared_ptr<::TProfile2D > GetProfile() const
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:25