Logo ROOT  
Reference Guide
RInterface.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
13
14#include "ROOT/RDataSource.hxx"
19#include "ROOT/RDF/RDefine.hxx"
21#include "ROOT/RDF/RFilter.hxx"
26#include "ROOT/RDF/RRange.hxx"
27#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/RResultPtr.hxx"
32#include "ROOT/RStringView.hxx"
33#include "ROOT/RVec.hxx"
34#include "ROOT/TypeTraits.hxx"
35#include "RtypesCore.h" // for ULong64_t
36#include "TDirectory.h"
37#include "TH1.h" // For Histo actions
38#include "TH2.h" // For Histo actions
39#include "TH3.h" // For Histo actions
40#include "THn.h"
41#include "TProfile.h"
42#include "TProfile2D.h"
43#include "TStatistic.h"
44
45#include <algorithm>
46#include <cstddef>
47#include <initializer_list>
48#include <iterator> // std::back_insterter
49#include <limits>
50#include <memory>
51#include <set>
52#include <sstream>
53#include <stdexcept>
54#include <string>
55#include <type_traits> // is_same, enable_if
56#include <typeinfo>
57#include <unordered_set>
58#include <utility> // std::index_sequence
59#include <vector>
60
61class TGraph;
62
63// Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
64namespace ROOT {
67void EnableImplicitMT(UInt_t numthreads);
68class RDataFrame;
69namespace Internal {
70namespace RDF {
72}
73} // namespace Internal
74} // namespace ROOT
75namespace cling {
76std::string printValue(ROOT::RDataFrame *tdf);
77}
78
79namespace ROOT {
80namespace RDF {
83namespace TTraits = ROOT::TypeTraits;
84
85template <typename Proxied, typename DataSource>
86class RInterface;
87
88using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
89
90// clang-format off
91/**
92 * \class ROOT::RDF::RInterface
93 * \ingroup dataframe
94 * \brief The public interface to the RDataFrame federation of classes.
95 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
96 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
97 *
98 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
99 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
100 */
101// clang-format on
102template <typename Proxied, typename DataSource = void>
104 using DS_t = DataSource;
108 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
110
111 template <typename T, typename W>
112 friend class RInterface;
113
114 friend void RDFInternal::TriggerRun(RNode &node);
115
116 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
117
118public:
119 ////////////////////////////////////////////////////////////////////////////
120 /// \brief Copy-assignment operator for RInterface.
121 RInterface &operator=(const RInterface &) = default;
122
123 ////////////////////////////////////////////////////////////////////////////
124 /// \brief Copy-ctor for RInterface.
125 RInterface(const RInterface &) = default;
126
127 ////////////////////////////////////////////////////////////////////////////
128 /// \brief Move-ctor for RInterface.
129 RInterface(RInterface &&) = default;
130
131 ////////////////////////////////////////////////////////////////////////////
132 /// \brief Move-assignment operator for RInterface.
134
135 ////////////////////////////////////////////////////////////////////////////
136 /// \brief Build a RInterface from a RLoopManager.
137 /// This constructor is only available for RInterface<RLoopManager>.
139 RInterface(const std::shared_ptr<RLoopManager> &proxied) : RInterfaceBase(proxied), fProxiedPtr(proxied)
140 {
141 }
142
143 ////////////////////////////////////////////////////////////////////////////
144 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
145 /// Different RDataFrame methods return different C++ types. All nodes, however,
146 /// can be cast to this common type at the cost of a small performance penalty.
147 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
148 /// around via (non-template, C++11) helper functions.
149 /// Example usage:
150 /// ~~~{.cpp}
151 /// // a function that conditionally adds a Range to a RDataFrame node.
152 /// RNode MaybeAddRange(RNode df, bool mustAddRange)
153 /// {
154 /// return mustAddRange ? df.Range(1) : df;
155 /// }
156 /// // use as :
157 /// ROOT::RDataFrame df(10);
158 /// auto maybeRanged = MaybeAddRange(df, true);
159 /// ~~~
160 /// Note that it is not a problem to pass RNode's by value.
161 operator RNode() const
162 {
163 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister);
164 }
165
166 ////////////////////////////////////////////////////////////////////////////
167 /// \brief Append a filter to the call graph.
168 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
169 /// signalling whether the event has passed the selection (true) or not (false).
170 /// \param[in] columns Names of the columns/branches in input to the filter function.
171 /// \param[in] name Optional name of this filter. See `Report`.
172 /// \return the filter node of the computation graph.
173 ///
174 /// Append a filter node at the point of the call graph corresponding to the
175 /// object this method is called on.
176 /// The callable `f` should not have side-effects (e.g. modification of an
177 /// external or static variable) to ensure correct results when implicit
178 /// multi-threading is active.
179 ///
180 /// RDataFrame only evaluates filters when necessary: if multiple filters
181 /// are chained one after another, they are executed in order and the first
182 /// one returning false causes the event to be discarded.
183 /// Even if multiple actions or transformations depend on the same filter,
184 /// it is executed once per entry. If its result is requested more than
185 /// once, the cached result is served.
186 ///
187 /// ### Example usage:
188 /// ~~~{.cpp}
189 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
190 /// auto filtered = df.Filter(myCut, {"x", "y"});
191 ///
192 /// // String: it must contain valid C++ except that column names can be used instead of variable names
193 /// auto filtered = df.Filter("x*y > 0");
194 /// ~~~
195 ///
196 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
197 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
198 /// ~~~{.cpp}
199 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
200 /// ~~~
201 /// but instead this will:
202 /// ~~~{.cpp}
203 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
204 /// ~~~
207 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
208 {
209 RDFInternal::CheckFilter(f);
210 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
211 constexpr auto nColumns = ColTypes_t::list_size;
212 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
213 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
214
216
217 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name);
218 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
219 }
220
221 ////////////////////////////////////////////////////////////////////////////
222 /// \brief Append a filter to the call graph.
223 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
224 /// signalling whether the event has passed the selection (true) or not (false).
225 /// \param[in] name Optional name of this filter. See `Report`.
226 /// \return the filter node of the computation graph.
227 ///
228 /// Refer to the first overload of this method for the full documentation.
231 {
232 // The sfinae is there in order to pick up the overloaded method which accepts two strings
233 // rather than this template method.
234 return Filter(f, {}, name);
235 }
236
237 ////////////////////////////////////////////////////////////////////////////
238 /// \brief Append a filter to the call graph.
239 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
240 /// signalling whether the event has passed the selection (true) or not (false).
241 /// \param[in] columns Names of the columns/branches in input to the filter function.
242 /// \return the filter node of the computation graph.
243 ///
244 /// Refer to the first overload of this method for the full documentation.
245 template <typename F>
246 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
247 {
248 return Filter(f, ColumnNames_t{columns});
249 }
250
251 ////////////////////////////////////////////////////////////////////////////
252 /// \brief Append a filter to the call graph.
253 /// \param[in] expression The filter expression in C++
254 /// \param[in] name Optional name of this filter. See `Report`.
255 /// \return the filter node of the computation graph.
256 ///
257 /// The expression is just-in-time compiled and used to filter entries. It must
258 /// be valid C++ syntax in which variable names are substituted with the names
259 /// of branches/columns.
260 ///
261 /// ### Example usage:
262 /// ~~~{.cpp}
263 /// auto filtered_df = df.Filter("myCollection.size() > 3");
264 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
265 /// ~~~
266 ///
267 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
268 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
269 /// ~~~{.cpp}
270 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
271 /// ~~~
272 /// but instead this will:
273 /// ~~~{.cpp}
274 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
275 /// ~~~
277 {
278 // deleted by the jitted call to JitFilterHelper
279 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
280 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
281 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister);
282 const auto jittedFilter =
285
287 }
288
289 // clang-format off
290 ////////////////////////////////////////////////////////////////////////////
291 /// \brief Define a new column.
292 /// \param[in] name The name of the defined column.
293 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
294 /// \param[in] columns Names of the columns/branches in input to the producer function.
295 /// \return the first node of the computation graph for which the new quantity is defined.
296 ///
297 /// Define a column that will be visible from all subsequent nodes
298 /// of the functional chain. The `expression` is only evaluated for entries that pass
299 /// all the preceding filters.
300 /// A new variable is created called `name`, accessible as if it was contained
301 /// in the dataset from subsequent transformations/actions.
302 ///
303 /// Use cases include:
304 /// * caching the results of complex calculations for easy and efficient multiple access
305 /// * extraction of quantities of interest from complex objects
306 ///
307 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
308 ///
309 /// ### Example usage:
310 /// ~~~{.cpp}
311 /// // assuming a function with signature:
312 /// double myComplexCalculation(const RVec<float> &muon_pts);
313 /// // we can pass it directly to Define
314 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
315 /// // alternatively, we can pass the body of the function as a string, as in Filter:
316 /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
317 /// ~~~
318 ///
319 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
320 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
321 /// ~~~{.cpp}
322 /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
323 /// ~~~
324 /// but instead this will:
325 /// ~~~{.cpp}
326 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
327 /// ~~~
330 {
331 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Define");
332 }
333 // clang-format on
334
335 // clang-format off
336 ////////////////////////////////////////////////////////////////////////////
337 /// \brief Define a new column with a value dependent on the processing slot.
338 /// \param[in] name The name of the defined column.
339 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
340 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
341 /// \return the first node of the computation graph for which the new quantity is defined.
342 ///
343 /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner.
344 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
345 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
346 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
347 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
348 ///
349 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
350 /// ~~~{.cpp}
351 /// int function(unsigned int, double, double);
352 /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
353 /// df.DefineSlot("x", function, {"column1", "column2"})
354 /// ~~~
355 ///
356 /// See Define() for more information.
357 template <typename F>
359 {
360 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "DefineSlot");
361 }
362 // clang-format on
363
364 // clang-format off
365 ////////////////////////////////////////////////////////////////////////////
366 /// \brief Define a new column with a value dependent on the processing slot and the current entry.
367 /// \param[in] name The name of the defined column.
368 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
369 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
370 /// \return the first node of the computation graph for which the new quantity is defined.
371 ///
372 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
373 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
374 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
375 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
376 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter
377 /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
378 ///
379 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
380 /// ~~~{.cpp}
381 /// int function(unsigned int, ULong64_t, double, double);
382 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
383 /// DefineSlotEntry("x", function, {"column1", "column2"})
384 /// ~~~
385 ///
386 /// See Define() for more information.
387 template <typename F>
389 {
390 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
391 "DefineSlotEntry");
392 }
393 // clang-format on
394
395 ////////////////////////////////////////////////////////////////////////////
396 /// \brief Define a new column.
397 /// \param[in] name The name of the defined column.
398 /// \param[in] expression An expression in C++ which represents the defined value
399 /// \return the first node of the computation graph for which the new quantity is defined.
400 ///
401 /// The expression is just-in-time compiled and used to produce the column entries.
402 /// It must be valid C++ syntax in which variable names are substituted with the names
403 /// of branches/columns.
404 ///
405 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
406 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
407 /// ~~~{.cpp}
408 /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
409 /// ~~~
410 /// but instead this will:
411 /// ~~~{.cpp}
412 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
413 /// ~~~
414 ///
415 /// Refer to the first overload of this method for the full documentation.
417 {
418 constexpr auto where = "Define";
420 // these checks must be done before jitting lest we throw exceptions in jitted code
423
424 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
425 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
426 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
427
429 newCols.AddDefine(std::move(jittedDefine));
430
431 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
432
433 return newInterface;
434 }
435
436 ////////////////////////////////////////////////////////////////////////////
437 /// \brief Overwrite the value and/or type of an existing column.
438 /// \param[in] name The name of the column to redefine.
439 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
440 /// \param[in] columns Names of the columns/branches in input to the expression.
441 /// \return the first node of the computation graph for which the quantity is redefined.
442 ///
443 /// The old value of the column can be used as an input for the expression.
444 ///
445 /// An exception is thrown in case the column to redefine does not already exist.
446 /// See Define() for more information.
449 {
450 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Redefine");
451 }
452
453 // clang-format off
454 ////////////////////////////////////////////////////////////////////////////
455 /// \brief Overwrite the value and/or type of an existing column.
456 /// \param[in] name The name of the column to redefine.
457 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
458 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot).
459 /// \return the first node of the computation graph for which the new quantity is defined.
460 ///
461 /// The old value of the column can be used as an input for the expression.
462 /// An exception is thrown in case the column to redefine does not already exist.
463 ///
464 /// See DefineSlot() for more information.
465 // clang-format on
466 template <typename F>
468 {
469 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "RedefineSlot");
470 }
471
472 // clang-format off
473 ////////////////////////////////////////////////////////////////////////////
474 /// \brief Overwrite the value and/or type of an existing column.
475 /// \param[in] name The name of the column to redefine.
476 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
477 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
478 /// \return the first node of the computation graph for which the new quantity is defined.
479 ///
480 /// The old value of the column can be used as an input for the expression.
481 /// An exception is thrown in case the column to re-define does not already exist.
482 ///
483 /// See DefineSlotEntry() for more information.
484 // clang-format on
485 template <typename F>
487 {
488 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
489 "RedefineSlotEntry");
490 }
491
492 ////////////////////////////////////////////////////////////////////////////
493 /// \brief Overwrite the value and/or type of an existing column.
494 /// \param[in] name The name of the column to redefine.
495 /// \param[in] expression An expression in C++ which represents the defined value
496 /// \return the first node of the computation graph for which the new quantity is defined.
497 ///
498 /// The expression is just-in-time compiled and used to produce the column entries.
499 /// It must be valid C++ syntax in which variable names are substituted with the names
500 /// of branches/columns.
501 ///
502 /// The old value of the column can be used as an input for the expression.
503 /// An exception is thrown in case the column to re-define does not already exist.
504 ///
505 /// Aliases cannot be overridden. See the corresponding Define() overload for more information.
507 {
508 constexpr auto where = "Redefine";
513
514 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
515 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
516 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
517
519 newCols.AddDefine(std::move(jittedDefine));
520
521 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
522
523 return newInterface;
524 }
525
526 // clang-format off
527 ////////////////////////////////////////////////////////////////////////////
528 /// \brief Define a new column that is updated when the input sample changes.
529 /// \param[in] name The name of the defined column.
530 /// \param[in] expression A C++ callable that computes the new value of the defined column.
531 /// \return the first node of the computation graph for which the new quantity is defined.
532 ///
533 /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)`
534 /// where:
535 /// - `T` is the type of the defined column
536 /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify
537 /// the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
538 /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is
539 /// being processed (see the class docs for more information).
540 ///
541 /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being
542 /// processed or to inject a callback into the event loop that is only called when the processing of a new sample
543 /// starts rather than at every entry.
544 ///
545 /// The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often.
546 ///
547 /// ### Example usage:
548 /// ~~~{.cpp}
549 /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
550 /// df.DefinePerSample("weightbysample",
551 /// [](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
552 /// { return id.Contains("sample1") ? 1.0f : 2.0f; });
553 /// ~~~
554 // clang-format on
555 // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch
556 template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
558 {
559 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
562
563 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t));
564 if (retTypeName.empty()) {
565 // The type is not known to the interpreter.
566 // We must not error out here, but if/when this column is used in jitted code
567 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t));
568 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
569 }
570
571 auto newColumn =
572 std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager);
573
575 newCols.AddDefine(std::move(newColumn));
576 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
577 return newInterface;
578 }
579
580 // clang-format off
581 ////////////////////////////////////////////////////////////////////////////
582 /// \brief Define a new column that is updated when the input sample changes.
583 /// \param[in] name The name of the defined column.
584 /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value.
585 /// \return the first node of the computation graph for which the new quantity is defined.
586 ///
587 /// The expression is just-in-time compiled and used to produce the column entries.
588 /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is
589 /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the
590 /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.
591 ///
592 /// ### Example usage:
593 /// ~~~{.py}
594 /// df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root'])
595 /// df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f')
596 /// ~~~
597 ///
598 /// \note
599 /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this
600 /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as
601 /// input parameters. This is for example the correct way to call this overload when working in PyROOT:
602 /// ~~~{.py}
603 /// ROOT.gInterpreter.Declare(
604 /// """
605 /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
606 /// return id.Contains("sample1") ? 1.0f : 2.0f;
607 /// }
608 /// """)
609 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
610 /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
611 /// ~~~
612 ///
613 /// \note
614 /// Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain
615 /// column names other than those mentioned above: the expression is evaluated once before the processing of the
616 /// sample even starts, so column values are not accessible.
617 // clang-format on
619 {
620 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
621 // these checks must be done before jitting lest we throw exceptions in jitted code
624
625 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
626 auto jittedDefine =
627 RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fColRegister, upcastNodeOnHeap);
628
630 newCols.AddDefine(std::move(jittedDefine));
631
632 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
633
634 return newInterface;
635 }
636
637 /// \brief Register systematic variations for an existing column.
638 /// \param[in] colName name of the column for which varied values are provided.
639 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
640 /// take any column values as input, similarly to what happens with Filter and Define calls. It must
641 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
642 /// \param[in] inputColumns the names of the columns to be passed to the callable.
643 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
644 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
645 ///
646 /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to
647 /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for
648 /// results that depend on any varied quantity a map/dictionary of varied results can be produced with
649 /// ROOT::RDF::Experimental::VariationsFor (see the example below).
650 ///
651 /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and
652 /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or
653 /// indirect dependencies of the column values on some registered variations). The keys will be a composition of
654 /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below.
655 ///
656 /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt.
657 /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"):
658 /// ~~~{.cpp}
659 /// auto nominal_hx =
660 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"})
661 /// .Filter("pt > k")
662 /// .Define("x", someFunc, {"pt"})
663 /// .Histo1D("x");
664 ///
665 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
666 /// hx["nominal"].Draw();
667 /// hx["pt:down"].Draw("SAME");
668 /// ~~~
669 template <typename F>
670 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
671 const std::vector<std::string> &variationTags, std::string_view variationName = "")
672 {
673 std::vector<std::string> colNames{{std::string(colName)}};
674 const std::string theVariationName{variationName.empty() ? colName : variationName};
675
676 return VaryImpl<true>(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags,
677 theVariationName);
678 }
679
680 /// \brief Register systematic variations for an existing columns using auto-generated variation tags.
681 /// This overload of Vary takes a nVariations parameter instead of a list of tag names. Tag names
682 /// will be auto-generated as the sequence 0...nVariations-1.
683 /// See the documentation of the previous overload for more information.
684 template <typename F>
685 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
686 std::size_t nVariations, std::string_view variationName = "")
687 {
688 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
689
690 std::vector<std::string> variationTags;
691 variationTags.reserve(nVariations);
692 for (std::size_t i = 0u; i < nVariations; ++i)
693 variationTags.emplace_back(std::to_string(i));
694
695 const std::string theVariationName{variationName.empty() ? colName : variationName};
696
697 return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName);
698 }
699
700 /// \brief Register a systematic variation that affects multiple columns simultaneously.
701 /// This overload of Vary takes a list of column names as first argument rather than a single name and
702 /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each
703 /// affected column.
704 /// See the documentation of the first Vary overload for more information.
705 ///
706 /// Example usage:
707 /// ~~~{.cpp}
708 /// // produce variations "ptAndEta:down" and "ptAndEta:up"
709 /// df.Vary({"pt", "eta"},
710 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
711 /// {"down", "up"},
712 /// "ptAndEta");
713 /// ~~~
714 template <typename F>
716 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
717 const std::vector<std::string> &variationTags, std::string_view variationName)
718 {
719 return VaryImpl<false>(colNames, std::forward<F>(expression), inputColumns, variationTags, variationName);
720 }
721
722 /// \brief Register systematic variations for one or more existing columns using auto-generated tags.
723 /// This overload of Vary takes a nVariations parameter instead of a list of tag names. Tag names
724 /// will be auto-generated as the sequence 0...nVariations-1.
725 /// See the documentation of the previous overload for more information.
726 template <typename F>
728 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
729 std::size_t nVariations, std::string_view variationName)
730 {
731 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
732
733 std::vector<std::string> variationTags;
734 variationTags.reserve(nVariations);
735 for (std::size_t i = 0u; i < nVariations; ++i)
736 variationTags.emplace_back(std::to_string(i));
737
738 return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName);
739 }
740
741 /// \brief Register systematic variations for an existing column.
742 /// \param[in] colName name of the column for which varied values are provided.
743 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
744 /// values for the specified column.
745 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
746 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
747 /// colName is used if none is provided.
748 ///
749 /// ~~~{.cpp}
750 /// auto nominal_hx =
751 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"})
752 /// .Filter("pt > k")
753 /// .Define("x", someFunc, {"pt"})
754 /// .Histo1D("x");
755 ///
756 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
757 /// hx["nominal"].Draw();
758 /// hx["pt:down"].Draw("SAME");
759 /// ~~~
761 const std::vector<std::string> &variationTags, std::string_view variationName = "")
762 {
763 std::vector<std::string> colNames{{std::string(colName)}};
764 const std::string theVariationName{variationName.empty() ? colName : variationName};
765
766 return JittedVaryImpl(colNames, expression, variationTags, theVariationName, /*isSingleColumn=*/true);
767 }
768
769 /// \brief Register systematic variations for an existing column.
770 /// \param[in] colName name of the column for which varied values are provided.
771 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
772 /// values for the specified column.
773 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be "0", "1", etc.
774 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
775 /// colName is used if none is provided.
776 ///
777 /// See the documentation for the previous overload for more information.
778 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations,
779 std::string_view variationName = "")
780 {
781 std::vector<std::string> variationTags;
782 variationTags.reserve(nVariations);
783 for (std::size_t i = 0u; i < nVariations; ++i)
784 variationTags.emplace_back(std::to_string(i));
785
786 return Vary(colName, expression, std::move(variationTags), variationName);
787 }
788
789 /// \brief Register systematic variations for one or more existing columns.
790 /// \param[in] colNames names of the columns for which varied values are provided.
791 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
792 /// values for the specified columns.
793 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be "0", "1", etc.
794 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
795 ///
796 /// ~~~{.cpp}
797 /// auto nominal_hx =
798 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy")
799 /// .Histo1D("x", "y");
800 ///
801 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
802 /// hx["nominal"].Draw();
803 /// hx["xy:0"].Draw("SAME");
804 /// hx["xy:1"].Draw("SAME");
805 /// ~~~
806 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
807 std::size_t nVariations, std::string_view variationName)
808 {
809 std::vector<std::string> variationTags;
810 variationTags.reserve(nVariations);
811 for (std::size_t i = 0u; i < nVariations; ++i)
812 variationTags.emplace_back(std::to_string(i));
813
814 return Vary(colNames, expression, std::move(variationTags), variationName);
815 }
816
817 /// \brief Register systematic variations for one or more existing columns.
818 /// \param[in] colNames names of the columns for which varied values are provided.
819 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
820 /// values for the specified columns.
821 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
822 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
823 ///
824 /// ~~~{.cpp}
825 /// auto nominal_hx =
826 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy")
827 /// .Histo1D("x", "y");
828 ///
829 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
830 /// hx["nominal"].Draw();
831 /// hx["xy:down"].Draw("SAME");
832 /// hx["xy:up"].Draw("SAME");
833 /// ~~~
834 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
835 const std::vector<std::string> &variationTags, std::string_view variationName)
836 {
837 return JittedVaryImpl(colNames, expression, variationTags, variationName, /*isSingleColumn=*/false);
838 }
839
840 ////////////////////////////////////////////////////////////////////////////
841 /// \brief Allow to refer to a column with a different name.
842 /// \param[in] alias name of the column alias
843 /// \param[in] columnName of the column to be aliased
844 /// \return the first node of the computation graph for which the alias is available.
845 ///
846 /// Aliasing an alias is supported.
847 ///
848 /// ### Example usage:
849 /// ~~~{.cpp}
850 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
851 /// ~~~
853 {
854 // The symmetry with Define is clear. We want to:
855 // - Create globally the alias and return this very node, unchanged
856 // - Make aliases accessible based on chains and not globally
857
858 // Helper to find out if a name is a column
859 auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
860
861 constexpr auto where = "Alias";
863 // If the alias name is a column name, there is a problem
865
866 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
867
869 newCols.AddAlias(alias, validColumnName);
870
871 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
872
873 return newInterface;
874 }
875
876 ////////////////////////////////////////////////////////////////////////////
877 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
878 /// \tparam ColumnTypes variadic list of branch/column types.
879 /// \param[in] treename The name of the output TTree.
880 /// \param[in] filename The name of the output TFile.
881 /// \param[in] columnList The list of names of the columns/branches to be written.
882 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
883 /// \return a `RDataFrame` that wraps the snapshotted dataset.
884 ///
885 /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
886 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
887 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
888 /// written out and it appears before the array in the columnList.
889 ///
890 /// By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other
891 /// types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are
892 /// present, by default all friend top-level branches that have names that do not collide with
893 /// names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out
894 /// friend branches with the same names of branches in the main TTree/TChain with names of the form
895 /// `<friendname>_<branchname>` in order to differentiate them from the branches in the main tree/chain.
896 ///
897 /// ### Writing to a sub-directory
898 ///
899 /// Snapshot supports writing the TTree in a sub-directory inside the TFile. It is sufficient to specify the path to
900 /// the TTree as part of the TTree name, e.g. `df.Snapshot("subdir/t", "f.root")` write TTree `t` in the
901 /// sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed).
902 ///
903 /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of
904 /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled with
905 /// respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in wrong
906 /// associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will
907 /// error out if such a "shuffled" TTree is used in a friendship.
908 ///
909 /// \note In case no events are written out (e.g. because no event passes all filters) the behavior of Snapshot in
910 /// single-thread and multi-thread runs is different: in single-thread runs, Snapshot will write out a TTree with
911 /// the specified name and zero entries; in multi-thread runs, no TTree object will be written out to disk.
912 ///
913 /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns
914 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
915 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
916 /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`.
917 ///
918 /// ### Example invocations:
919 ///
920 /// ~~~{.cpp}
921 /// // without specifying template parameters (column types automatically deduced)
922 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
923 ///
924 /// // specifying template parameters ("x" is `int`, "y" is `float`)
925 /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
926 /// ~~~
927 ///
928 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
929 /// `RSnapshotOptions`:
930 /// ~~~{.cpp}
931 /// RSnapshotOptions opts;
932 /// opts.fLazy = true;
933 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
934 /// ~~~
935 template <typename... ColumnTypes>
938 const RSnapshotOptions &options = RSnapshotOptions())
939 {
940 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
941 }
942
943 ////////////////////////////////////////////////////////////////////////////
944 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
945 /// \param[in] treename The name of the output TTree.
946 /// \param[in] filename The name of the output TFile.
947 /// \param[in] columnList The list of names of the columns/branches to be written.
948 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
949 /// \return a `RDataFrame` that wraps the snapshotted dataset.
950 ///
951 /// This function returns a `RDataFrame` built with the output tree as a source.
952 /// The types of the columns are automatically inferred and do not need to be specified.
953 ///
954 /// See above for a more complete description and example usages.
956 const ColumnNames_t &columnList,
957 const RSnapshotOptions &options = RSnapshotOptions())
958 {
959 // like columnList but with `#var` columns removed
960 auto colListNoPoundSizes = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
961 // like columnListWithoutSizeColumns but with aliases resolved
962 auto colListNoAliases = GetValidatedColumnNames(colListNoPoundSizes.size(), colListNoPoundSizes);
964 // like validCols but with missing size branches required by array branches added in the right positions
965 const auto pairOfColumnLists =
967 std::move(colListNoAliases), std::move(colListNoPoundSizes));
968 const auto &colListNoAliasesWithSizeBranches = pairOfColumnLists.first;
969 const auto &colListWithAliasesAndSizeBranches = pairOfColumnLists.second;
970
971
972 const auto fullTreeName = treename;
973 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
974 treename = parsedTreePath.fTreeName;
975 const auto &dirname = parsedTreePath.fDirName;
976
977 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(
978 RDFInternal::SnapshotHelperArgs{std::string(filename), std::string(dirname), std::string(treename),
979 colListWithAliasesAndSizeBranches, options});
980
982 auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, colListNoAliasesWithSizeBranches);
983
984 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
985 colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, fProxiedPtr,
986 colListNoAliasesWithSizeBranches.size());
987
988 if (!options.fLazy)
989 *resPtr;
990 return resPtr;
991 }
992
993 // clang-format off
994 ////////////////////////////////////////////////////////////////////////////
995 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
996 /// \param[in] treename The name of the output TTree.
997 /// \param[in] filename The name of the output TFile.
998 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
999 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
1000 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1001 ///
1002 /// This function returns a `RDataFrame` built with the output tree as a source.
1003 /// The types of the columns are automatically inferred and do not need to be specified.
1004 ///
1005 /// See above for a more complete description and example usages.
1007 std::string_view columnNameRegexp = "",
1008 const RSnapshotOptions &options = RSnapshotOptions())
1009 {
1010 const auto definedColumns = fColRegister.GetNames();
1011 auto *tree = fLoopManager->GetTree();
1012 const auto treeBranchNames = tree != nullptr ? ROOT::Internal::TreeUtils::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1013 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1014 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1015 ColumnNames_t dsColumnsWithoutSizeColumns;
1016 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1017 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1018 ColumnNames_t columnNames;
1019 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size());
1020 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1021 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1022 columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end());
1023
1024 // The only way we can get duplicate entries is if a column coming from a tree or data-source is Redefine'd.
1025 // RemoveDuplicates should preserve ordering of the columns: it might be meaningful.
1026 RDFInternal::RemoveDuplicates(columnNames);
1027
1028 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
1029 return Snapshot(treename, filename, selectedColumns, options);
1030 }
1031 // clang-format on
1032
1033 // clang-format off
1034 ////////////////////////////////////////////////////////////////////////////
1035 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1036 /// \param[in] treename The name of the output TTree.
1037 /// \param[in] filename The name of the output TFile.
1038 /// \param[in] columnList The list of names of the columns/branches to be written.
1039 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1040 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1041 ///
1042 /// This function returns a `RDataFrame` built with the output tree as a source.
1043 /// The types of the columns are automatically inferred and do not need to be specified.
1044 ///
1045 /// See above for a more complete description and example usages.
1047 std::initializer_list<std::string> columnList,
1048 const RSnapshotOptions &options = RSnapshotOptions())
1049 {
1050 ColumnNames_t selectedColumns(columnList);
1051 return Snapshot(treename, filename, selectedColumns, options);
1052 }
1053 // clang-format on
1054
1055 ////////////////////////////////////////////////////////////////////////////
1056 /// \brief Save selected columns in memory.
1057 /// \tparam ColumnTypes variadic list of branch/column types.
1058 /// \param[in] columnList columns to be cached in memory.
1059 /// \return a `RDataFrame` that wraps the cached dataset.
1060 ///
1061 /// This action returns a new `RDataFrame` object, completely detached from
1062 /// the originating `RDataFrame`. The new dataframe only contains the cached
1063 /// columns and stores their content in memory for fast, zero-copy subsequent access.
1064 ///
1065 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
1066 /// fits in memory and that will be accessed many times.
1067 ///
1068 /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns
1069 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1070 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1071 /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`.
1072 ///
1073 /// ### Example usage:
1074 ///
1075 /// **Types and columns specified:**
1076 /// ~~~{.cpp}
1077 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
1078 /// ~~~
1079 ///
1080 /// **Types inferred and columns specified (this invocation relies on jitting):**
1081 /// ~~~{.cpp}
1082 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
1083 /// ~~~
1084 ///
1085 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
1086 /// ~~~{.cpp}
1087 /// auto cache_all_cols_df = df.Cache(myRegexp);
1088 /// ~~~
1089 template <typename... ColumnTypes>
1091 {
1092 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
1093 return CacheImpl<ColumnTypes...>(columnList, staticSeq);
1094 }
1095
1096 ////////////////////////////////////////////////////////////////////////////
1097 /// \brief Save selected columns in memory.
1098 /// \param[in] columnList columns to be cached in memory
1099 /// \return a `RDataFrame` that wraps the cached dataset.
1100 ///
1101 /// See the previous overloads for more information.
1103 {
1104 // Early return: if the list of columns is empty, just return an empty RDF
1105 // If we proceed, the jitted call will not compile!
1106 if (columnList.empty()) {
1107 auto nEntries = *this->Count();
1108 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
1109 return emptyRDF;
1110 }
1111
1112 std::stringstream cacheCall;
1113 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
1114 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
1115 fColRegister);
1116 // build a string equivalent to
1117 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
1118 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
1119 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
1121 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
1122 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
1123
1124 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache");
1125
1126 const auto validColumnNames =
1127 GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
1128 const auto colTypes = GetValidatedArgTypes(validColumnNames, fColRegister, fLoopManager->GetTree(), fDataSource,
1129 "Cache", /*vector2rvec=*/false);
1130 for (const auto &colType : colTypes)
1131 cacheCall << colType << ", ";
1132 if (!columnListWithoutSizeColumns.empty())
1133 cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
1134 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
1135 << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));";
1136
1137 // book the code to jit with the RLoopManager and trigger the event loop
1138 fLoopManager->ToJitExec(cacheCall.str());
1139 fLoopManager->Jit();
1140
1141 return resRDF;
1142 }
1143
1144 ////////////////////////////////////////////////////////////////////////////
1145 /// \brief Save selected columns in memory.
1146 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1147 /// \return a `RDataFrame` that wraps the cached dataset.
1148 ///
1149 /// The existing columns are matched against the regular expression. If the string provided
1150 /// is empty, all columns are selected. See the previous overloads for more information.
1152 {
1153 const auto definedColumns = fColRegister.GetNames();
1154 auto *tree = fLoopManager->GetTree();
1155 const auto treeBranchNames =
1157 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1158 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1159 ColumnNames_t dsColumnsWithoutSizeColumns;
1160 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1161 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1162 ColumnNames_t columnNames;
1163 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size());
1164 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1165 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1166 columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end());
1167 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache");
1168 return Cache(selectedColumns);
1169 }
1170
1171 ////////////////////////////////////////////////////////////////////////////
1172 /// \brief Save selected columns in memory.
1173 /// \param[in] columnList columns to be cached in memory.
1174 /// \return a `RDataFrame` that wraps the cached dataset.
1175 ///
1176 /// See the previous overloads for more information.
1177 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
1178 {
1179 ColumnNames_t selectedColumns(columnList);
1180 return Cache(selectedColumns);
1181 }
1182
1183 // clang-format off
1184 ////////////////////////////////////////////////////////////////////////////
1185 /// \brief Creates a node that filters entries based on range: [begin, end).
1186 /// \param[in] begin Initial entry number considered for this range.
1187 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1188 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
1189 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
1190 ///
1191 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
1192 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
1193 ///
1194 /// ### Example usage:
1195 /// ~~~{.cpp}
1196 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
1197 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
1198 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
1199 /// ~~~
1200 // clang-format on
1201 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
1202 {
1203 // check invariants
1204 if (stride == 0 || (end != 0 && end < begin))
1205 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
1206 CheckIMTDisabled("Range");
1207
1209 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
1210 RInterface<RDFDetail::RRange<Proxied>, DS_t> newInterface(std::move(rangePtr), *fLoopManager, fColRegister);
1211 return newInterface;
1212 }
1213
1214 // clang-format off
1215 ////////////////////////////////////////////////////////////////////////////
1216 /// \brief Creates a node that filters entries based on range.
1217 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1218 /// \return a node of the computation graph for which the range is defined.
1219 ///
1220 /// See the other Range overload for a detailed description.
1221 // clang-format on
1222 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
1223
1224 // clang-format off
1225 ////////////////////////////////////////////////////////////////////////////
1226 /// \brief Execute a user-defined function on each entry (*instant action*).
1227 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1228 /// \param[in] columns Names of the columns/branches in input to the user function.
1229 ///
1230 /// The callable `f` is invoked once per entry. This is an *instant action*:
1231 /// upon invocation, an event loop as well as execution of all scheduled actions
1232 /// is triggered.
1233 /// Users are responsible for the thread-safety of this callable when executing
1234 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
1235 ///
1236 /// ### Example usage:
1237 /// ~~~{.cpp}
1238 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
1239 /// ~~~
1240 // clang-format on
1241 template <typename F>
1242 void Foreach(F f, const ColumnNames_t &columns = {})
1243 {
1244 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
1245 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
1246 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
1247 }
1248
1249 // clang-format off
1250 ////////////////////////////////////////////////////////////////////////////
1251 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*).
1252 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1253 /// \param[in] columns Names of the columns/branches in input to the user function.
1254 ///
1255 /// Same as `Foreach`, but the user-defined function takes an extra
1256 /// `unsigned int` as its first parameter, the *processing slot index*.
1257 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
1258 /// for each thread of execution.
1259 /// This is meant as a helper in writing thread-safe `Foreach`
1260 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
1261 /// The user-defined processing callable is able to follow different
1262 /// *streams of processing* indexed by the first parameter.
1263 /// `ForeachSlot` works just as well with single-thread execution: in that
1264 /// case `slot` will always be `0`.
1265 ///
1266 /// ### Example usage:
1267 /// ~~~{.cpp}
1268 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
1269 /// ~~~
1270 // clang-format on
1271 template <typename F>
1272 void ForeachSlot(F f, const ColumnNames_t &columns = {})
1273 {
1274 using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
1275 constexpr auto nColumns = ColTypes_t::list_size;
1276
1277 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1278 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
1279
1280 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
1282
1283 auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister);
1284
1285 fLoopManager->Run();
1286 }
1287
1288 // clang-format off
1289 ////////////////////////////////////////////////////////////////////////////
1290 /// \brief Execute a user-defined reduce operation on the values of a column.
1291 /// \tparam F The type of the reduce callable. Automatically deduced.
1292 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1293 /// \param[in] f A callable with signature `T(T,T)`
1294 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1295 /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr.
1296 ///
1297 /// A reduction takes two values of a column and merges them into one (e.g.
1298 /// by summing them, taking the maximum, etc). This action performs the
1299 /// specified reduction operation on all processed column values, returning
1300 /// a single value of the same type. The callable f must satisfy the general
1301 /// requirements of a *processing function* besides having signature `T(T,T)`
1302 /// where `T` is the type of column columnName.
1303 ///
1304 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
1305 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
1306 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
1307 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
1308 /// overload.
1309 ///
1310 /// ### Example usage:
1311 /// ~~~{.cpp}
1312 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
1313 /// ~~~
1314 ///
1315 /// This action is *lazy*: upon invocation of this method the calculation is
1316 /// booked but not executed. Also see RResultPtr.
1317 // clang-format on
1318 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1320 {
1321 static_assert(
1323 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
1324 return Reduce(std::move(f), columnName, T());
1325 }
1326
1327 ////////////////////////////////////////////////////////////////////////////
1328 /// \brief Execute a user-defined reduce operation on the values of a column.
1329 /// \tparam F The type of the reduce callable. Automatically deduced.
1330 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1331 /// \param[in] f A callable with signature `T(T,T)`
1332 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1333 /// \param[in] redIdentity The reduced object of each thread is initialized to this value.
1334 /// \return the reduced quantity wrapped in a RResultPtr.
1335 ///
1336 /// ### Example usage:
1337 /// ~~~{.cpp}
1338 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
1339 /// ~~~
1340 /// See the description of the first Reduce overload for more information.
1341 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1342 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
1343 {
1344 return Aggregate(f, f, columnName, redIdentity);
1345 }
1346
1347 ////////////////////////////////////////////////////////////////////////////
1348 /// \brief Return the number of entries processed (*lazy action*).
1349 /// \return the number of entries wrapped in a RResultPtr.
1350 ///
1351 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
1352 /// This action is *lazy*: upon invocation of this method the calculation is
1353 /// booked but not executed. Also see RResultPtr.
1354 ///
1355 /// ### Example usage:
1356 /// ~~~{.cpp}
1357 /// auto nEntriesAfterCuts = myFilteredDf.Count();
1358 /// ~~~
1359 ///
1361 {
1362 const auto nSlots = fLoopManager->GetNSlots();
1363 auto cSPtr = std::make_shared<ULong64_t>(0);
1364 using Helper_t = RDFInternal::CountHelper;
1366 auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
1368 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
1369 }
1370
1371 ////////////////////////////////////////////////////////////////////////////
1372 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default).
1373 /// \tparam T The type of the column.
1374 /// \tparam COLL The type of collection used to store the values.
1375 /// \param[in] column The name of the column to collect the values of.
1376 /// \return the content of the selected column wrapped in a RResultPtr.
1377 ///
1378 /// The collection type to be specified for C-style array columns is `RVec<T>`:
1379 /// in this case the returned collection is a `std::vector<RVec<T>>`.
1380 /// ### Example usage:
1381 /// ~~~{.cpp}
1382 /// // In this case intCol is a std::vector<int>
1383 /// auto intCol = rdf.Take<int>("integerColumn");
1384 /// // Same content as above but in this case taken as a RVec<int>
1385 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
1386 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
1387 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
1388 /// ~~~
1389 /// This action is *lazy*: upon invocation of this method the calculation is
1390 /// booked but not executed. Also see RResultPtr.
1391 template <typename T, typename COLL = std::vector<T>>
1393 {
1394 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
1395
1396 const auto validColumnNames = GetValidatedColumnNames(1, columns);
1397 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
1398
1399 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1401 auto valuesPtr = std::make_shared<COLL>();
1402 const auto nSlots = fLoopManager->GetNSlots();
1403
1404 auto action =
1405 std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister);
1406 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
1407 }
1408
1409 ////////////////////////////////////////////////////////////////////////////
1410 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1411 /// \tparam V The type of the column used to fill the histogram.
1412 /// \param[in] model The returned histogram will be constructed using this as a model.
1413 /// \param[in] vName The name of the column that will fill the histogram.
1414 /// \return the monodimensional histogram wrapped in a RResultPtr.
1415 ///
1416 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
1417 /// is filled with each one of the elements of the container. In case multiple columns of container type
1418 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1419 /// possibly different lengths between events).
1420 /// This action is *lazy*: upon invocation of this method the calculation is
1421 /// booked but not executed. Also see RResultPtr.
1422 ///
1423 /// ### Example usage:
1424 /// ~~~{.cpp}
1425 /// // Deduce column type (this invocation needs jitting internally)
1426 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1427 /// // Explicit column type
1428 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1429 /// ~~~
1430 ///
1431 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1432 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1433 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1434 template <typename V = RDFDetail::RInferredType>
1435 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
1436 {
1437 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
1438
1439 const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
1440
1441 std::shared_ptr<::TH1D> h(nullptr);
1442 {
1443 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1444 h = model.GetHistogram();
1445 h->SetDirectory(nullptr);
1446 }
1447
1448 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
1449 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
1450 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h, fProxiedPtr);
1451 }
1452
1453 ////////////////////////////////////////////////////////////////////////////
1454 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1455 /// \tparam V The type of the column used to fill the histogram.
1456 /// \param[in] vName The name of the column that will fill the histogram.
1457 /// \return the monodimensional histogram wrapped in a RResultPtr.
1458 ///
1459 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1460 /// The "name" and "title" strings are built starting from the input column name.
1461 /// See the description of the first Histo1D() overload for more details.
1462 ///
1463 /// ### Example usage:
1464 /// ~~~{.cpp}
1465 /// // Deduce column type (this invocation needs jitting internally)
1466 /// auto myHist1 = myDf.Histo1D("myColumn");
1467 /// // Explicit column type
1468 /// auto myHist2 = myDf.Histo1D<float>("myColumn");
1469 /// ~~~
1470 template <typename V = RDFDetail::RInferredType>
1472 {
1473 const auto h_name = std::string(vName);
1474 const auto h_title = h_name + ";" + h_name + ";count";
1475 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1476 }
1477
1478 ////////////////////////////////////////////////////////////////////////////
1479 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1480 /// \tparam V The type of the column used to fill the histogram.
1481 /// \tparam W The type of the column used as weights.
1482 /// \param[in] model The returned histogram will be constructed using this as a model.
1483 /// \param[in] vName The name of the column that will fill the histogram.
1484 /// \param[in] wName The name of the column that will provide the weights.
1485 /// \return the monodimensional histogram wrapped in a RResultPtr.
1486 ///
1487 /// See the description of the first Histo1D() overload for more details.
1488 ///
1489 /// ### Example usage:
1490 /// ~~~{.cpp}
1491 /// // Deduce column type (this invocation needs jitting internally)
1492 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1493 /// // Explicit column type
1494 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1495 /// ~~~
1496 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1498 {
1499 const std::vector<std::string_view> columnViews = {vName, wName};
1500 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1501 ? ColumnNames_t()
1502 : ColumnNames_t(columnViews.begin(), columnViews.end());
1503 std::shared_ptr<::TH1D> h(nullptr);
1504 {
1505 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1506 h = model.GetHistogram();
1507 }
1508 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h, fProxiedPtr);
1509 }
1510
1511 ////////////////////////////////////////////////////////////////////////////
1512 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1513 /// \tparam V The type of the column used to fill the histogram.
1514 /// \tparam W The type of the column used as weights.
1515 /// \param[in] vName The name of the column that will fill the histogram.
1516 /// \param[in] wName The name of the column that will provide the weights.
1517 /// \return the monodimensional histogram wrapped in a RResultPtr.
1518 ///
1519 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1520 /// The "name" and "title" strings are built starting from the input column names.
1521 /// See the description of the first Histo1D() overload for more details.
1522 ///
1523 /// ### Example usage:
1524 /// ~~~{.cpp}
1525 /// // Deduce column types (this invocation needs jitting internally)
1526 /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1527 /// // Explicit column types
1528 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1529 /// ~~~
1530 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1532 {
1533 // We build name and title based on the value and weight column names
1534 std::string str_vName{vName};
1535 std::string str_wName{wName};
1536 const auto h_name = str_vName + "_weighted_" + str_wName;
1537 const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1538 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1539 }
1540
1541 ////////////////////////////////////////////////////////////////////////////
1542 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1543 /// \tparam V The type of the column used to fill the histogram.
1544 /// \tparam W The type of the column used as weights.
1545 /// \param[in] model The returned histogram will be constructed using this as a model.
1546 /// \return the monodimensional histogram wrapped in a RResultPtr.
1547 ///
1548 /// This overload will use the first two default columns as column names.
1549 /// See the description of the first Histo1D() overload for more details.
1550 template <typename V, typename W>
1551 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1552 {
1553 return Histo1D<V, W>(model, "", "");
1554 }
1555
1556 ////////////////////////////////////////////////////////////////////////////
1557 /// \brief Fill and return a two-dimensional histogram (*lazy action*).
1558 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1559 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1560 /// \param[in] model The returned histogram will be constructed using this as a model.
1561 /// \param[in] v1Name The name of the column that will fill the x axis.
1562 /// \param[in] v2Name The name of the column that will fill the y axis.
1563 /// \return the bidimensional histogram wrapped in a RResultPtr.
1564 ///
1565 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1566 /// is filled with each one of the elements of the container. In case multiple columns of container type
1567 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1568 /// possibly different lengths between events).
1569 /// This action is *lazy*: upon invocation of this method the calculation is
1570 /// booked but not executed. Also see RResultPtr.
1571 ///
1572 /// ### Example usage:
1573 /// ~~~{.cpp}
1574 /// // Deduce column types (this invocation needs jitting internally)
1575 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1576 /// // Explicit column types
1577 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1578 /// ~~~
1579 ///
1580 ///
1581 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1582 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1583 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1584 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1586 {
1587 std::shared_ptr<::TH2D> h(nullptr);
1588 {
1589 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1590 h = model.GetHistogram();
1591 }
1592 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1593 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1594 }
1595 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1596 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1597 ? ColumnNames_t()
1598 : ColumnNames_t(columnViews.begin(), columnViews.end());
1599 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h, fProxiedPtr);
1600 }
1601
1602 ////////////////////////////////////////////////////////////////////////////
1603 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*).
1604 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1605 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1606 /// \tparam W The type of the column used for the weights of the histogram.
1607 /// \param[in] model The returned histogram will be constructed using this as a model.
1608 /// \param[in] v1Name The name of the column that will fill the x axis.
1609 /// \param[in] v2Name The name of the column that will fill the y axis.
1610 /// \param[in] wName The name of the column that will provide the weights.
1611 /// \return the bidimensional histogram wrapped in a RResultPtr.
1612 ///
1613 /// This action is *lazy*: upon invocation of this method the calculation is
1614 /// booked but not executed. Also see RResultPtr.
1615 ///
1616 /// ### Example usage:
1617 /// ~~~{.cpp}
1618 /// // Deduce column types (this invocation needs jitting internally)
1619 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1620 /// // Explicit column types
1621 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1622 /// ~~~
1623 ///
1624 /// See the documentation of the first Histo2D() overload for more details.
1625 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1626 typename W = RDFDetail::RInferredType>
1629 {
1630 std::shared_ptr<::TH2D> h(nullptr);
1631 {
1632 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1633 h = model.GetHistogram();
1634 }
1635 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1636 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1637 }
1638 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1639 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1640 ? ColumnNames_t()
1641 : ColumnNames_t(columnViews.begin(), columnViews.end());
1642 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
1643 }
1644
1645 template <typename V1, typename V2, typename W>
1647 {
1648 return Histo2D<V1, V2, W>(model, "", "", "");
1649 }
1650
1651 ////////////////////////////////////////////////////////////////////////////
1652 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1653 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1654 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1655 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1656 /// \param[in] model The returned histogram will be constructed using this as a model.
1657 /// \param[in] v1Name The name of the column that will fill the x axis.
1658 /// \param[in] v2Name The name of the column that will fill the y axis.
1659 /// \param[in] v3Name The name of the column that will fill the z axis.
1660 /// \return the tridimensional histogram wrapped in a RResultPtr.
1661 ///
1662 /// This action is *lazy*: upon invocation of this method the calculation is
1663 /// booked but not executed. Also see RResultPtr.
1664 ///
1665 /// ### Example usage:
1666 /// ~~~{.cpp}
1667 /// // Deduce column types (this invocation needs jitting internally)
1668 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1669 /// "myValueX", "myValueY", "myValueZ");
1670 /// // Explicit column types
1671 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1672 /// "myValueX", "myValueY", "myValueZ");
1673 /// ~~~
1674 ///
1675 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1676 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1677 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1678 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1679 typename V3 = RDFDetail::RInferredType>
1681 std::string_view v3Name = "")
1682 {
1683 std::shared_ptr<::TH3D> h(nullptr);
1684 {
1685 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1686 h = model.GetHistogram();
1687 }
1688 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1689 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1690 }
1691 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1692 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1693 ? ColumnNames_t()
1694 : ColumnNames_t(columnViews.begin(), columnViews.end());
1695 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
1696 }
1697
1698 ////////////////////////////////////////////////////////////////////////////
1699 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1700 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1701 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1702 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1703 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1704 /// \param[in] model The returned histogram will be constructed using this as a model.
1705 /// \param[in] v1Name The name of the column that will fill the x axis.
1706 /// \param[in] v2Name The name of the column that will fill the y axis.
1707 /// \param[in] v3Name The name of the column that will fill the z axis.
1708 /// \param[in] wName The name of the column that will provide the weights.
1709 /// \return the tridimensional histogram wrapped in a RResultPtr.
1710 ///
1711 /// This action is *lazy*: upon invocation of this method the calculation is
1712 /// booked but not executed. Also see RResultPtr.
1713 ///
1714 /// ### Example usage:
1715 /// ~~~{.cpp}
1716 /// // Deduce column types (this invocation needs jitting internally)
1717 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1718 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1719 /// // Explicit column types
1720 /// using d_t = double;
1721 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1722 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1723 /// ~~~
1724 ///
1725 ///
1726 /// See the documentation of the first Histo2D() overload for more details.
1727 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1728 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1730 std::string_view v3Name, std::string_view wName)
1731 {
1732 std::shared_ptr<::TH3D> h(nullptr);
1733 {
1734 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1735 h = model.GetHistogram();
1736 }
1737 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1738 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1739 }
1740 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1741 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1742 ? ColumnNames_t()
1743 : ColumnNames_t(columnViews.begin(), columnViews.end());
1744 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
1745 }
1746
1747 template <typename V1, typename V2, typename V3, typename W>
1749 {
1750 return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1751 }
1752
1753 ////////////////////////////////////////////////////////////////////////////
1754 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
1755 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not
1756 /// present.
1757 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the
1758 /// object.
1759 /// \param[in] model The returned histogram will be constructed using this as a model.
1760 /// \param[in] columnList
1761 /// A list containing the names of the columns that will be passed when calling `Fill`.
1762 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
1763 /// \return the N-dimensional histogram wrapped in a RResultPtr.
1764 ///
1765 /// This action is *lazy*: upon invocation of this method the calculation is
1766 /// booked but not executed. See RResultPtr documentation.
1767 ///
1768 /// ### Example usage:
1769 /// ~~~{.cpp}
1770 /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4,
1771 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
1772 /// {"col0", "col1", "col2", "col3"});
1773 /// ~~~
1774 ///
1775 template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads
1776 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
1777 {
1778 std::shared_ptr<::THnD> h(nullptr);
1779 {
1780 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1781 h = model.GetHistogram();
1782
1783 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
1784 h->Sumw2();
1785 } else if (int(columnList.size()) != h->GetNdimensions()) {
1786 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
1787 }
1788 }
1789 return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h,
1790 fProxiedPtr);
1791 }
1792
1793 ////////////////////////////////////////////////////////////////////////////
1794 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
1795 /// \param[in] model The returned histogram will be constructed using this as a model.
1796 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1797 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
1798 /// \return the N-dimensional histogram wrapped in a RResultPtr.
1799 ///
1800 /// This action is *lazy*: upon invocation of this method the calculation is
1801 /// booked but not executed. Also see RResultPtr.
1802 ///
1803 /// ### Example usage:
1804 /// ~~~{.cpp}
1805 /// auto myFilledObj = myDf.HistoND({"name","title", 4,
1806 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
1807 /// {"col0", "col1", "col2", "col3"});
1808 /// ~~~
1809 ///
1810 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
1811 {
1812 std::shared_ptr<::THnD> h(nullptr);
1813 {
1814 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1815 h = model.GetHistogram();
1816
1817 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
1818 h->Sumw2();
1819 } else if (int(columnList.size()) != h->GetNdimensions()) {
1820 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
1821 }
1822 }
1823 return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList, h, h, fProxiedPtr,
1824 columnList.size());
1825 }
1826
1827 ////////////////////////////////////////////////////////////////////////////
1828 /// \brief Fill and return a TGraph object (*lazy action*).
1829 /// \tparam X The type of the column used to fill the x axis.
1830 /// \tparam Y The type of the column used to fill the y axis.
1831 /// \param[in] x The name of the column that will fill the x axis.
1832 /// \param[in] y The name of the column that will fill the y axis.
1833 /// \return the TGraph wrapped in a RResultPtr.
1834 ///
1835 /// Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph
1836 /// is filled with each one of the elements of the container.
1837 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1838 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1839 /// A name and a title to the TGraph is given based on the input column names.
1840 ///
1841 /// This action is *lazy*: upon invocation of this method the calculation is
1842 /// booked but not executed. Also see RResultPtr.
1843 ///
1844 /// ### Example usage:
1845 /// ~~~{.cpp}
1846 /// // Deduce column types (this invocation needs jitting internally)
1847 /// auto myGraph1 = myDf.Graph("xValues", "yValues");
1848 /// // Explicit column types
1849 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
1850 /// ~~~
1851 ///
1852 /// \note Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory
1853 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1854 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1855 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType>
1857 {
1858 auto graph = std::make_shared<::TGraph>();
1859 const std::vector<std::string_view> columnViews = {x, y};
1860 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1861 ? ColumnNames_t()
1862 : ColumnNames_t(columnViews.begin(), columnViews.end());
1863
1864 const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
1865
1866 // We build a default name and title based on the input columns
1867 const auto g_name = validatedColumns[0] + "_vs_" + validatedColumns[1];
1868 const auto g_title = validatedColumns[0] + " vs " + validatedColumns[1];
1869 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
1870 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
1871 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
1872
1873 return CreateAction<RDFInternal::ActionTags::Graph, X, Y>(validatedColumns, graph, graph, fProxiedPtr);
1874 }
1875
1876 ////////////////////////////////////////////////////////////////////////////
1877 /// \brief Fill and return a TGraphAsymmErrors object (*lazy action*).
1878 /// \param[in] x The name of the column that will fill the x axis.
1879 /// \param[in] y The name of the column that will fill the y axis.
1880 /// \param[in] exl The name of the column of X low errors
1881 /// \param[in] exh The name of the column of X high errors
1882 /// \param[in] eyl The name of the column of Y low errors
1883 /// \param[in] eyh The name of the column of Y high errors
1884 /// \return the TGraphAsymmErrors wrapped in a RResultPtr.
1885 ///
1886 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1887 /// is filled with each one of the elements of the container.
1888 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1889 ///
1890 /// This action is *lazy*: upon invocation of this method the calculation is
1891 /// booked but not executed. Also see RResultPtr.
1892 ///
1893 /// ### Example usage:
1894 /// ~~~{.cpp}
1895 /// // Deduce column types (this invocation needs jitting internally)
1896 /// auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh");
1897 /// // Explicit column types
1898 /// using f = float
1899 /// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh");
1900 /// ~~~
1901 ///
1902 /// \note Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory
1903 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1904 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1905 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType,
1906 typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType,
1907 typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType>
1910 std::string_view exh = "", std::string_view eyl = "", std::string_view eyh = "")
1911 {
1912 auto graph = std::make_shared<::TGraphAsymmErrors>();
1913 const std::vector<std::string_view> columnViews = {x, y, exl, exh, eyl, eyh};
1914 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1915 ? ColumnNames_t()
1916 : ColumnNames_t(columnViews.begin(), columnViews.end());
1917
1918 const auto validatedColumns = GetValidatedColumnNames(6, userColumns);
1919
1920 // We build a default name and title based on the input columns
1921 const auto g_name = validatedColumns[0] + "_vs_" + validatedColumns[1];
1922 const auto g_title = validatedColumns[0] + " vs " + validatedColumns[1];
1923 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
1924 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
1925 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
1926
1927 return CreateAction<RDFInternal::ActionTags::GraphAsymmErrors, X, Y, EXL, EXH, EYL, EYH>(validatedColumns, graph,
1929 }
1930
1931 ////////////////////////////////////////////////////////////////////////////
1932 /// \brief Fill and return a one-dimensional profile (*lazy action*).
1933 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1934 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1935 /// \param[in] model The model to be considered to build the new return value.
1936 /// \param[in] v1Name The name of the column that will fill the x axis.
1937 /// \param[in] v2Name The name of the column that will fill the y axis.
1938 /// \return the monodimensional profile wrapped in a RResultPtr.
1939 ///
1940 /// This action is *lazy*: upon invocation of this method the calculation is
1941 /// booked but not executed. Also see RResultPtr.
1942 ///
1943 /// ### Example usage:
1944 /// ~~~{.cpp}
1945 /// // Deduce column types (this invocation needs jitting internally)
1946 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1947 /// // Explicit column types
1948 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1949 /// ~~~
1950 ///
1951 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
1952 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1953 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1954 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1956 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1957 {
1958 std::shared_ptr<::TProfile> h(nullptr);
1959 {
1960 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1961 h = model.GetProfile();
1962 }
1963
1964 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1965 throw std::runtime_error("Profiles with no axes limits are not supported yet.");
1966 }
1967 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1968 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1969 ? ColumnNames_t()
1970 : ColumnNames_t(columnViews.begin(), columnViews.end());
1971 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h, fProxiedPtr);
1972 }
1973
1974 ////////////////////////////////////////////////////////////////////////////
1975 /// \brief Fill and return a one-dimensional profile (*lazy action*).
1976 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1977 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1978 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
1979 /// \param[in] model The model to be considered to build the new return value.
1980 /// \param[in] v1Name The name of the column that will fill the x axis.
1981 /// \param[in] v2Name The name of the column that will fill the y axis.
1982 /// \param[in] wName The name of the column that will provide the weights.
1983 /// \return the monodimensional profile wrapped in a RResultPtr.
1984 ///
1985 /// This action is *lazy*: upon invocation of this method the calculation is
1986 /// booked but not executed. Also see RResultPtr.
1987 ///
1988 /// ### Example usage:
1989 /// ~~~{.cpp}
1990 /// // Deduce column types (this invocation needs jitting internally)
1991 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
1992 /// // Explicit column types
1993 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
1994 /// "xValues", "yValues", "weight");
1995 /// ~~~
1996 ///
1997 /// See the first Profile1D() overload for more details.
1998 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1999 typename W = RDFDetail::RInferredType>
2002 {
2003 std::shared_ptr<::TProfile> h(nullptr);
2004 {
2005 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2006 h = model.GetProfile();
2007 }
2008
2009 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2010 throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
2011 }
2012 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2013 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2014 ? ColumnNames_t()
2015 : ColumnNames_t(columnViews.begin(), columnViews.end());
2016 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
2017 }
2018
2019 ////////////////////////////////////////////////////////////////////////////
2020 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2021 /// See the first Profile1D() overload for more details.
2022 template <typename V1, typename V2, typename W>
2024 {
2025 return Profile1D<V1, V2, W>(model, "", "", "");
2026 }
2027
2028 ////////////////////////////////////////////////////////////////////////////
2029 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2030 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2031 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2032 /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2033 /// \param[in] model The returned profile will be constructed using this as a model.
2034 /// \param[in] v1Name The name of the column that will fill the x axis.
2035 /// \param[in] v2Name The name of the column that will fill the y axis.
2036 /// \param[in] v3Name The name of the column that will fill the z axis.
2037 /// \return the bidimensional profile wrapped in a RResultPtr.
2038 ///
2039 /// This action is *lazy*: upon invocation of this method the calculation is
2040 /// booked but not executed. Also see RResultPtr.
2041 ///
2042 /// ### Example usage:
2043 /// ~~~{.cpp}
2044 /// // Deduce column types (this invocation needs jitting internally)
2045 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2046 /// "xValues", "yValues", "zValues");
2047 /// // Explicit column types
2048 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2049 /// "xValues", "yValues", "zValues");
2050 /// ~~~
2051 ///
2052 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2053 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2054 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2055 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2056 typename V3 = RDFDetail::RInferredType>
2058 std::string_view v2Name = "", std::string_view v3Name = "")
2059 {
2060 std::shared_ptr<::TProfile2D> h(nullptr);
2061 {
2062 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2063 h = model.GetProfile();
2064 }
2065
2066 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2067 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2068 }
2069 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2070 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2071 ? ColumnNames_t()
2072 : ColumnNames_t(columnViews.begin(), columnViews.end());
2073 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
2074 }
2075
2076 ////////////////////////////////////////////////////////////////////////////
2077 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2078 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2079 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2080 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2081 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2082 /// \param[in] model The returned histogram will be constructed using this as a model.
2083 /// \param[in] v1Name The name of the column that will fill the x axis.
2084 /// \param[in] v2Name The name of the column that will fill the y axis.
2085 /// \param[in] v3Name The name of the column that will fill the z axis.
2086 /// \param[in] wName The name of the column that will provide the weights.
2087 /// \return the bidimensional profile wrapped in a RResultPtr.
2088 ///
2089 /// This action is *lazy*: upon invocation of this method the calculation is
2090 /// booked but not executed. Also see RResultPtr.
2091 ///
2092 /// ### Example usage:
2093 /// ~~~{.cpp}
2094 /// // Deduce column types (this invocation needs jitting internally)
2095 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2096 /// "xValues", "yValues", "zValues", "weight");
2097 /// // Explicit column types
2098 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2099 /// "xValues", "yValues", "zValues", "weight");
2100 /// ~~~
2101 ///
2102 /// See the first Profile2D() overload for more details.
2103 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2104 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2106 std::string_view v3Name, std::string_view wName)
2107 {
2108 std::shared_ptr<::TProfile2D> h(nullptr);
2109 {
2110 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2111 h = model.GetProfile();
2112 }
2113
2114 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2115 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2116 }
2117 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2118 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2119 ? ColumnNames_t()
2120 : ColumnNames_t(columnViews.begin(), columnViews.end());
2121 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
2122 }
2123
2124 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2125 /// See the first Profile2D() overload for more details.
2126 template <typename V1, typename V2, typename V3, typename W>
2128 {
2129 return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
2130 }
2131
2132 ////////////////////////////////////////////////////////////////////////////
2133 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*).
2134 ///
2135 /// Type T must provide at least:
2136 /// - a copy-constructor
2137 /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList
2138 /// (these types can also be passed as template parameters to this method)
2139 /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the
2140 /// objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that
2141 /// if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in
2142 /// the TCollection*).
2143 ///
2144 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
2145 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
2146 /// \tparam T The type of the object to fill. Automatically deduced.
2147 /// \param[in] model The model to be considered to build the new return value.
2148 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2149 /// \return the filled object wrapped in a RResultPtr.
2150 ///
2151 /// The user gives up ownership of the model object.
2152 /// The list of column names to be used for filling must always be specified.
2153 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
2154 /// Also see RResultPtr.
2155 ///
2156 /// ### Example usage:
2157 /// ~~~{.cpp}
2158 /// MyClass obj;
2159 /// // Deduce column types (this invocation needs jitting internally, and in this case
2160 /// // MyClass needs to be known to the interpreter)
2161 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
2162 /// // explicit column types
2163 /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});
2164 /// ~~~
2165 ///
2166 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T>
2167 RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList)
2168 {
2169 auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model));
2170 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
2171 throw std::runtime_error("The absence of axes limits is not supported yet.");
2172 }
2173 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h, fProxiedPtr,
2174 columnList.size());
2175 }
2176
2177 ////////////////////////////////////////////////////////////////////////////
2178 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2179 ///
2180 /// \tparam V The type of the value column
2181 /// \param[in] value The name of the column with the values to fill the statistics with.
2182 /// \return the filled TStatistic object wrapped in a RResultPtr.
2183 ///
2184 /// ### Example usage:
2185 /// ~~~{.cpp}
2186 /// // Deduce column type (this invocation needs jitting internally)
2187 /// auto stats0 = myDf.Stats("values");
2188 /// // Explicit column type
2189 /// auto stats1 = myDf.Stats<float>("values");
2190 /// ~~~
2191 ///
2192 template <typename V = RDFDetail::RInferredType>
2194 {
2195 ColumnNames_t columns;
2196 if (!value.empty()) {
2197 columns.emplace_back(std::string(value));
2198 }
2199 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2201 return Fill(TStatistic(), validColumnNames);
2202 } else {
2203 return Fill<V>(TStatistic(), validColumnNames);
2204 }
2205 }
2206
2207 ////////////////////////////////////////////////////////////////////////////
2208 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2209 ///
2210 /// \tparam V The type of the value column
2211 /// \tparam W The type of the weight column
2212 /// \param[in] value The name of the column with the values to fill the statistics with.
2213 /// \param[in] weight The name of the column with the weights to fill the statistics with.
2214 /// \return the filled TStatistic object wrapped in a RResultPtr.
2215 ///
2216 /// ### Example usage:
2217 /// ~~~{.cpp}
2218 /// // Deduce column types (this invocation needs jitting internally)
2219 /// auto stats0 = myDf.Stats("values", "weights");
2220 /// // Explicit column types
2221 /// auto stats1 = myDf.Stats<int, float>("values", "weights");
2222 /// ~~~
2223 ///
2224 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2226 {
2227 ColumnNames_t columns{std::string(value), std::string(weight)};
2228 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
2229 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
2230 const auto validColumnNames = GetValidatedColumnNames(2, columns);
2231 // We have 3 cases:
2232 // 1. Both types are inferred: we use Fill and let the jit kick in.
2233 // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
2234 // 3. Both types are explicit: we invoke the fully compiled Fill method.
2235 if (vIsInferred && wIsInferred) {
2236 return Fill(TStatistic(), validColumnNames);
2237 } else if (vIsInferred != wIsInferred) {
2238 std::string error("The ");
2239 error += vIsInferred ? "value " : "weight ";
2240 error += "column type is explicit, while the ";
2241 error += vIsInferred ? "weight " : "value ";
2242 error += " is specified to be inferred. This case is not supported: please specify both types or none.";
2243 throw std::runtime_error(error);
2244 } else {
2245 return Fill<V, W>(TStatistic(), validColumnNames);
2246 }
2247 }
2248
2249 ////////////////////////////////////////////////////////////////////////////
2250 /// \brief Return the minimum of processed column values (*lazy action*).
2251 /// \tparam T The type of the branch/column.
2252 /// \param[in] columnName The name of the branch/column to be treated.
2253 /// \return the minimum value of the selected column wrapped in a RResultPtr.
2254 ///
2255 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2256 /// template specialization of this method.
2257 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2258 ///
2259 /// This action is *lazy*: upon invocation of this method the calculation is
2260 /// booked but not executed. Also see RResultPtr.
2261 ///
2262 /// ### Example usage:
2263 /// ~~~{.cpp}
2264 /// // Deduce column type (this invocation needs jitting internally)
2265 /// auto minVal0 = myDf.Min("values");
2266 /// // Explicit column type
2267 /// auto minVal1 = myDf.Min<double>("values");
2268 /// ~~~
2269 ///
2270 template <typename T = RDFDetail::RInferredType>
2272 {
2273 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2274 using RetType_t = RDFDetail::MinReturnType_t<T>;
2275 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
2276 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV, fProxiedPtr);
2277 }
2278
2279 ////////////////////////////////////////////////////////////////////////////
2280 /// \brief Return the maximum of processed column values (*lazy action*).
2281 /// \tparam T The type of the branch/column.
2282 /// \param[in] columnName The name of the branch/column to be treated.
2283 /// \return the maximum value of the selected column wrapped in a RResultPtr.
2284 ///
2285 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2286 /// template specialization of this method.
2287 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2288 ///
2289 /// This action is *lazy*: upon invocation of this method the calculation is
2290 /// booked but not executed. Also see RResultPtr.
2291 ///
2292 /// ### Example usage:
2293 /// ~~~{.cpp}
2294 /// // Deduce column type (this invocation needs jitting internally)
2295 /// auto maxVal0 = myDf.Max("values");
2296 /// // Explicit column type
2297 /// auto maxVal1 = myDf.Max<double>("values");
2298 /// ~~~
2299 ///
2300 template <typename T = RDFDetail::RInferredType>
2302 {
2303 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2304 using RetType_t = RDFDetail::MaxReturnType_t<T>;
2305 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
2306 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV, fProxiedPtr);
2307 }
2308
2309 ////////////////////////////////////////////////////////////////////////////
2310 /// \brief Return the mean of processed column values (*lazy action*).
2311 /// \tparam T The type of the branch/column.
2312 /// \param[in] columnName The name of the branch/column to be treated.
2313 /// \return the mean value of the selected column wrapped in a RResultPtr.
2314 ///
2315 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2316 /// template specialization of this method.
2317 ///
2318 /// This action is *lazy*: upon invocation of this method the calculation is
2319 /// booked but not executed. Also see RResultPtr.
2320 ///
2321 /// ### Example usage:
2322 /// ~~~{.cpp}
2323 /// // Deduce column type (this invocation needs jitting internally)
2324 /// auto meanVal0 = myDf.Mean("values");
2325 /// // Explicit column type
2326 /// auto meanVal1 = myDf.Mean<double>("values");
2327 /// ~~~
2328 ///
2329 template <typename T = RDFDetail::RInferredType>
2331 {
2332 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2333 auto meanV = std::make_shared<double>(0);
2334 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV, fProxiedPtr);
2335 }
2336
2337 ////////////////////////////////////////////////////////////////////////////
2338 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*).
2339 /// \tparam T The type of the branch/column.
2340 /// \param[in] columnName The name of the branch/column to be treated.
2341 /// \return the standard deviation value of the selected column wrapped in a RResultPtr.
2342 ///
2343 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2344 /// template specialization of this method.
2345 ///
2346 /// This action is *lazy*: upon invocation of this method the calculation is
2347 /// booked but not executed. Also see RResultPtr.
2348 ///
2349 /// ### Example usage:
2350 /// ~~~{.cpp}
2351 /// // Deduce column type (this invocation needs jitting internally)
2352 /// auto stdDev0 = myDf.StdDev("values");
2353 /// // Explicit column type
2354 /// auto stdDev1 = myDf.StdDev<double>("values");
2355 /// ~~~
2356 ///
2357 template <typename T = RDFDetail::RInferredType>
2359 {
2360 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2361 auto stdDeviationV = std::make_shared<double>(0);
2362 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV, fProxiedPtr);
2363 }
2364
2365 // clang-format off
2366 ////////////////////////////////////////////////////////////////////////////
2367 /// \brief Return the sum of processed column values (*lazy action*).
2368 /// \tparam T The type of the branch/column.
2369 /// \param[in] columnName The name of the branch/column.
2370 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
2371 /// \return the sum of the selected column wrapped in a RResultPtr.
2372 ///
2373 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2374 /// template specialization of this method.
2375 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2376 ///
2377 /// This action is *lazy*: upon invocation of this method the calculation is
2378 /// booked but not executed. Also see RResultPtr.
2379 ///
2380 /// ### Example usage:
2381 /// ~~~{.cpp}
2382 /// // Deduce column type (this invocation needs jitting internally)
2383 /// auto sum0 = myDf.Sum("values");
2384 /// // Explicit column type
2385 /// auto sum1 = myDf.Sum<double>("values");
2386 /// ~~~
2387 ///
2388 template <typename T = RDFDetail::RInferredType>
2390 Sum(std::string_view columnName = "",
2391 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
2392 {
2393 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2394 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
2395 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV, fProxiedPtr);
2396 }
2397 // clang-format on
2398
2399 ////////////////////////////////////////////////////////////////////////////
2400 /// \brief Gather filtering statistics.
2401 /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr.
2402 ///
2403 /// Calling `Report` on the main `RDataFrame` object gathers stats for
2404 /// all named filters in the call graph. Calling this method on a
2405 /// stored chain state (i.e. a graph node different from the first) gathers
2406 /// the stats for all named filters in the chain section between the original
2407 /// `RDataFrame` and that node (included). Stats are gathered in the same
2408 /// order as the named filters have been added to the graph.
2409 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
2410 /// effects cuts had.
2411 ///
2412 /// This action is *lazy*: upon invocation of
2413 /// this method the calculation is booked but not executed. See RResultPtr
2414 /// documentation.
2415 ///
2416 /// ### Example usage:
2417 /// ~~~{.cpp}
2418 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
2419 /// auto cutReport = filtered3.Report();
2420 /// cutReport->Print();
2421 /// ~~~
2422 ///
2424 {
2425 bool returnEmptyReport = false;
2426 // if this is a RInterface<RLoopManager> on which `Define` has been called, users
2427 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
2428 // certainly does not contain named filters.
2429 // The number 4 takes into account the implicit columns for entry and slot number
2430 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
2432 returnEmptyReport = true;
2433
2434 auto rep = std::make_shared<RCutFlowReport>();
2435 using Helper_t = RDFInternal::ReportHelper<Proxied>;
2437
2438 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr.get(), returnEmptyReport), ColumnNames_t({}),
2440
2441 return MakeResultPtr(rep, *fLoopManager, std::move(action));
2442 }
2443
2444 /// \brief Returns the names of the filters created.
2445 /// \return the container of filters names.
2446 ///
2447 /// If called on a root node, all the filters in the computation graph will
2448 /// be printed. For any other node, only the filters upstream of that node.
2449 /// Filters without a name are printed as "Unnamed Filter"
2450 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2451 ///
2452 /// ### Example usage:
2453 /// ~~~{.cpp}
2454 /// auto filtNames = d.GetFilterNames();
2455 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
2456 /// ~~~
2457 ///
2458 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
2459
2460 // clang-format off
2461 ////////////////////////////////////////////////////////////////////////////
2462 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2463 /// \tparam F The type of the aggregator callable. Automatically deduced.
2464 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2465 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2466 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
2467 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2468 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2469 /// \param[in] aggIdentity The aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted)
2470 /// \return the result of the aggregation wrapped in a RResultPtr.
2471 ///
2472 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2473 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2474 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2475 /// the value of the column columnName.
2476 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2477 /// Otherwise the signature of aggregator must be `void(U&,T)`.
2478 ///
2479 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2480 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2481 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2482 ///
2483 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2484 ///
2485 /// Example usage:
2486 /// ~~~{.cpp}
2487 /// auto aggregator = [](double acc, double x) { return acc * x; };
2488 /// ROOT::EnableImplicitMT();
2489 /// // If multithread is enabled, the aggregator function will be called by more threads
2490 /// // and will produce a vector of partial accumulators.
2491 /// // The merger function performs the final aggregation of these partial results.
2492 /// auto merger = [](std::vector<double> &accumulators) {
2493 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2494 /// accumulators[0] *= accumulators[i];
2495 /// }
2496 /// };
2497 ///
2498 /// // The accumulator is initialized at this value by every thread.
2499 /// double initValue = 1.;
2500 ///
2501 /// // Multiplies all elements of the column "x"
2502 /// auto result = d.Aggregate(aggregator, merger, columnName, initValue);
2503 /// ~~~
2504 // clang-format on
2505 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2506 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2507 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2508 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2509 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2510 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2511 {
2512 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2513 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2514
2515 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2516 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
2517
2518 auto accObjPtr = std::make_shared<U>(aggIdentity);
2519 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2521 auto action = std::make_unique<Action_t>(
2522 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2524 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2525 }
2526
2527 // clang-format off
2528 ////////////////////////////////////////////////////////////////////////////
2529 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2530 /// \tparam F The type of the aggregator callable. Automatically deduced.
2531 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2532 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2533 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2534 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2535 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2536 /// \return the result of the aggregation wrapped in a RResultPtr.
2537 ///
2538 /// See previous Aggregate overload for more information.
2539 // clang-format on
2540 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2541 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2542 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2543 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2544 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2545 {
2546 static_assert(
2548 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2549 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2550 }
2551
2552 // clang-format off
2553 ////////////////////////////////////////////////////////////////////////////
2554 /// \brief Book execution of a custom action using a user-defined helper object.
2555 /// \tparam FirstColumn The type of the first column used by this action. Inferred together with OtherColumns if not present.
2556 /// \tparam OtherColumns A list of the types of the other columns used by this action
2557 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2558 /// \param[in] helper The Action Helper to be scheduled.
2559 /// \param[in] columns The names of the columns on which the helper acts.
2560 /// \return the result of the helper wrapped in a RResultPtr.
2561 ///
2562 /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2563 /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
2564 /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2565 ///
2566 /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
2567 /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
2568 /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
2569 /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
2570 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2571 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2572 /// the requested columns for the particular entry being processed.
2573 /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
2574 /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
2575 /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
2576 /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations.
2577 /// It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper,
2578 /// or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations).
2579 /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2580 /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
2581 /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
2582 /// method concurrently, but will always pass different 'slot' numbers.
2583 /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
2584 /// Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called
2585 /// before Initialize(), because the RResultPtr is constructed before the event loop is started.
2586 /// * ROOT::RDF::SampleCallback_t GetSampleCallback(): optional. If present, it must return a callable with the
2587 /// appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing
2588 /// of every sample, as per with DefinePerSample().
2589 ///
2590 /// In case this is called without specifying column types, jitting is used,
2591 /// and the Helper class needs to be known to the interpreter.<br>
2592 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2593 ///
2594 /// ### Examples
2595 /// See [this tutorial](https://root.cern/doc/master/df018__customActions_8C.html) for an example implementation of an action helper.<br>
2596 /// It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx.
2597 ///
2598 // clang-format on
2599
2600 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper>
2602 {
2603 using HelperT = std::decay_t<Helper>;
2604 // TODO add more static sanity checks on Helper
2607 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2608
2609 auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper));
2610 auto resPtr = hPtr->GetResultPtr();
2611
2613 return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{});
2614 } else {
2615 return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
2616 fProxiedPtr, columns.size());
2617 }
2618 }
2619
2620 ////////////////////////////////////////////////////////////////////////////
2621 /// \brief Provides a representation of the columns in the dataset.
2622 /// \tparam ColumnTypes variadic list of branch/column types.
2623 /// \param[in] columnList Names of the columns to be displayed.
2624 /// \param[in] nRows Number of events for each column to be displayed.
2625 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
2626 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2627 ///
2628 /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
2629 /// form. RDisplay will either print on the standard output a summarized version through `RDisplay::Print()` or will
2630 /// return a complete version through `RDisplay::AsString()`.
2631 ///
2632 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see
2633 /// RResultPtr.
2634 ///
2635 /// Example usage:
2636 /// ~~~{.cpp}
2637 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
2638 /// auto d1 = rdf.Display("");
2639 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
2640 /// auto d2 = d.Display({"x", "y"}, 128);
2641 /// // Printing the short representations, the event loop will run
2642 /// d1->Print();
2643 /// d2->Print();
2644 /// ~~~
2645 template <typename... ColumnTypes>
2646 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
2647 {
2648 CheckIMTDisabled("Display");
2649 auto newCols = columnList;
2650 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
2651 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
2652 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
2653 // Need to add ULong64_t type corresponding to the first column rdfentry_
2654 return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>(
2655 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr);
2656 }
2657
2658 ////////////////////////////////////////////////////////////////////////////
2659 /// \brief Provides a representation of the columns in the dataset.
2660 /// \param[in] columnList Names of the columns to be displayed.
2661 /// \param[in] nRows Number of events for each column to be displayed.
2662 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
2663 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2664 ///
2665 /// This overload automatically infers the column types.
2666 /// See the previous overloads for further details.
2667 ///
2668 /// Invoked when no types are specified to Display
2669 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
2670 {
2671 CheckIMTDisabled("Display");
2672 auto newCols = columnList;
2673 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
2674 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
2675 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
2676 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(
2677 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr,
2678 columnList.size() + 1);
2679 }
2680
2681 ////////////////////////////////////////////////////////////////////////////
2682 /// \brief Provides a representation of the columns in the dataset.
2683 /// \param[in] columnNameRegexp A regular expression to select the columns.
2684 /// \param[in] nRows Number of events for each column to be displayed.
2685 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
2686 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2687 ///
2688 /// The existing columns are matched against the regular expression. If the string provided
2689 /// is empty, all columns are selected.
2690 /// See the previous overloads for further details.
2692 Display(std::string_view columnNameRegexp = "", size_t nRows = 5, size_t nMaxCollectionElements = 10)
2693 {
2694 const auto columnNames = GetColumnNames();
2695 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display");
2696 return Display(selectedColumns, nRows, nMaxCollectionElements);
2697 }
2698
2699 ////////////////////////////////////////////////////////////////////////////
2700 /// \brief Provides a representation of the columns in the dataset.
2701 /// \param[in] columnList Names of the columns to be displayed.
2702 /// \param[in] nRows Number of events for each column to be displayed.
2703 /// \param[in] nMaxCollectionElements Number of maximum elements in collection.
2704 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2705 ///
2706 /// See the previous overloads for further details.
2708 Display(std::initializer_list<std::string> columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
2709 {
2710 ColumnNames_t selectedColumns(columnList);
2711 return Display(selectedColumns, nRows, nMaxCollectionElements);
2712 }
2713
2714private:
2715 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
2717 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
2718 {
2719 if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine
2723 } else {
2727 }
2728
2729 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
2730 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
2732 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
2734
2735 constexpr auto nColumns = ColTypes_t::list_size;
2736
2737 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2738 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
2739
2740 // Declare return type to the interpreter, for future use by jitted actions
2741 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
2742 if (retTypeName.empty()) {
2743 // The type is not known to the interpreter.
2744 // We must not error out here, but if/when this column is used in jitted code
2745 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
2746 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
2747 }
2748
2749 using NewCol_t = RDFDetail::RDefine<F, DefineType>;
2750 auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
2752
2754 newCols.AddDefine(std::move(newColumn));
2755
2756 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
2757
2758 return newInterface;
2759 }
2760
2761 // This overload is chosen when the callable passed to Define or DefineSlot returns void.
2762 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
2763 // this way compilation of `Define` has no way to continue after throwing the error.
2764 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
2765 bool IsFStringConv = std::is_convertible<F, std::string>::value,
2766 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
2767 std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>
2769 {
2770 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
2771 "Error in `Define`: type returned by expression is not default-constructible");
2772 return *this; // never reached
2773 }
2774
2775 template <typename... ColumnTypes>
2777 const ColumnNames_t &columnList, const RSnapshotOptions &options)
2778 {
2779 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
2780
2781 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size());
2782 // validCols has aliases resolved, while columnListWithoutSizeColumns still has aliases in it.
2783 const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
2786
2787 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
2788 const auto &treename = parsedTreePath.fTreeName;
2789 const auto &dirname = parsedTreePath.fDirName;
2790
2791 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
2792 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
2793
2795 auto newRDF =
2796 std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, /*defaultColumns=*/columnListWithoutSizeColumns);
2797
2798 // The Snapshot helper will use validCols (with aliases resolved) as input columns, and
2799 // columnListWithoutSizeColumns (still with aliases in it, passed through snapHelperArgs) as output column names.
2800 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs,
2801 fProxiedPtr);
2802
2803 if (!options.fLazy)
2804 *resPtr;
2805 return resPtr;
2806 }
2807
2808 ////////////////////////////////////////////////////////////////////////////
2809 /// \brief Implementation of cache.
2810 template <typename... ColTypes, std::size_t... S>
2811 RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>)
2812 {
2813 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
2814
2815 // Check at compile time that the columns types are copy constructible
2816 constexpr bool areCopyConstructible =
2818 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
2819
2820 RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size());
2821
2822 auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
2823 auto ds = std::make_unique<RLazyDS<ColTypes...>>(
2824 std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
2825
2826 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns));
2827
2828 return cachedRDF;
2829 }
2830
2831 template <bool IsSingleColumn, typename F>
2833 VaryImpl(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
2834 const std::vector<std::string> &variationTags, std::string_view variationName)
2835 {
2836 using F_t = std::decay_t<F>;
2837 using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types;
2838 using RetType = typename TTraits::CallableTraits<F_t>::ret_type;
2839 constexpr auto nColumns = ColTypes_t::list_size;
2840
2841 SanityChecksForVary<RetType>(colNames, variationTags, variationName);
2842
2843 const auto validColumnNames = GetValidatedColumnNames(nColumns, inputColumns);
2844 CheckAndFillDSColumns(validColumnNames, ColTypes_t{});
2845
2846 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
2847 if (retTypeName.empty()) {
2848 // The type is not known to the interpreter, but we don't want to error out
2849 // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name.
2850 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
2851 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
2852 }
2853
2854 auto variation = std::make_shared<RDFInternal::RVariation<F_t, IsSingleColumn>>(
2855 colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager,
2856 validColumnNames);
2857
2859 newCols.AddVariation(std::move(variation));
2860
2861 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
2862
2863 return newInterface;
2864 }
2865
2866 RInterface<Proxied, DS_t> JittedVaryImpl(const std::vector<std::string> &colNames, std::string_view expression,
2867 const std::vector<std::string> &variationTags,
2868 std::string_view variationName, bool isSingleColumn)
2869 {
2870 R__ASSERT(variationTags.size() > 0 && "Must have at least one variation.");
2871 R__ASSERT(colNames.size() > 0 && "Must have at least one varied column.");
2872 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
2873
2874 for (auto &colName : colNames) {
2875 RDFInternal::CheckValidCppVarName(colName, "Vary");
2878 }
2879 RDFInternal::CheckValidCppVarName(variationName, "Vary");
2880
2881 // when varying multiple columns, they must be different columns
2882 if (colNames.size() > 1) {
2883 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
2884 if (uniqueCols.size() != colNames.size())
2885 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
2886 }
2887
2888 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
2889 auto jittedVariation =
2890 RDFInternal::BookVariationJit(colNames, variationName, variationTags, expression, *fLoopManager, fDataSource,
2891 fColRegister, fLoopManager->GetBranchNames(), upcastNodeOnHeap, isSingleColumn);
2892
2894 newColRegister.AddVariation(std::move(jittedVariation));
2895
2896 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newColRegister));
2897
2898 return newInterface;
2899 }
2900
2901 template <typename Helper, typename ActionResultType>
2902 auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr,
2903 const std::shared_ptr<Helper> &hPtr,
2905 -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
2906 {
2907 return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, fProxiedPtr, 0u);
2908 }
2909
2910 template <typename Helper, typename ActionResultType, typename... Others>
2911 RResultPtr<ActionResultType>
2912 CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &,
2913 const std::shared_ptr<Helper>& /*hPtr*/,
2914 Others...)
2915 {
2916 throw std::logic_error(std::string("An action was booked with no input columns, but the action requires "
2917 "columns! The action helper type was ") +
2918 typeid(Helper).name());
2919 return {};
2920 }
2921
2922protected:
2923 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
2924 const RDFInternal::RColumnRegister &colRegister)
2925 : RInterfaceBase(lm, colRegister), fProxiedPtr(proxied)
2926 {
2927 }
2928
2929 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
2930};
2931
2932} // namespace RDF
2933
2934} // namespace ROOT
2935
2936#endif // ROOT_RDF_INTERFACE
#define f(i)
Definition: RSha256.hxx:104
#define h(i)
Definition: RSha256.hxx:106
unsigned int UInt_t
Definition: RtypesCore.h:46
unsigned long long ULong64_t
Definition: RtypesCore.h:81
#define R__ASSERT(e)
Definition: TError.h:118
constexpr Int_t kError
Definition: TError.h:46
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition: TGX11.cxx:110
Base class for action helpers, see RInterface::Book() for more information.
Definition: RActionImpl.hxx:26
The head node of a RDF computation graph.
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
void Run(bool jit=true)
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
unsigned int GetNSlots() const
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
Helper class that provides the operation graph nodes.
A RDataFrame node that produces a result.
Definition: RAction.hxx:54
A binder for user-defined columns, variations and aliases.
void AddVariation(std::shared_ptr< RVariationBase > variation)
Register a new systematic variation.
void AddDefine(std::shared_ptr< RDFDetail::RDefineBase > column)
Add a new defined column.
ColumnNames_t GetNames() const
Return the list of the names of the defined columns (Defines + Aliases).
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
RDFDetail::RLoopManager * fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
void CheckIMTDisabled(std::string_view callerName)
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
The public interface to the RDataFrame federation of classes.
Definition: RInterface.hxx:103
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RColumnRegister &colRegister)
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action).
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for an existing columns using auto-generated variation tags.
Definition: RInterface.hxx:685
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for an existing column.
Definition: RInterface.hxx:778
RResultPtr<::TGraph > Graph(std::string_view x="", std::string_view y="")
Fill and return a TGraph object (lazy action).
RResultPtr< ActionResultType > CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &, const std::shared_ptr< Helper > &, Others...)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot.
Definition: RInterface.hxx:358
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action).
std::enable_if_t< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > > DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, F expression)
Define a new column that is updated when the input sample changes.
Definition: RInterface.hxx:557
RInterface & operator=(RInterface &&)=default
Move-assignment operator for RInterface.
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName)
Register systematic variations for one or more existing columns using auto-generated tags.
Definition: RInterface.hxx:728
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for an existing column.
Definition: RInterface.hxx:760
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column.
Definition: RInterface.hxx:329
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action).
RInterface< Proxied, DS_t > Redefine(std::string_view name, std::string_view expression)
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:506
auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &resPtr, const std::shared_ptr< Helper > &hPtr, TTraits::TypeList< RDFDetail::RInferredType >) -> decltype(hPtr->Exec(0u), RResultPtr< ActionResultType >{})
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName)
Register systematic variations for one or more existing columns.
Definition: RInterface.hxx:806
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action).
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view fullTreeName, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
Fill and return a one-dimensional profile (lazy action).
RInterface(const std::shared_ptr< RLoopManager > &proxied)
Build a RInterface from a RLoopManager.
Definition: RInterface.hxx:139
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
Definition: RInterface.hxx:246
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, std::string_view expression)
Define a new column that is updated when the input sample changes.
Definition: RInterface.hxx:618
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
Definition: RInterface.hxx:852
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Redefine(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:448
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
RInterface< Proxied, DS_t > VaryImpl(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
RResultPtr< typename std::decay_t< Helper >::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
Definition: RInterface.hxx:109
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action).
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for an existing column.
Definition: RInterface.hxx:670
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action).
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName)
Register systematic variations for one or more existing columns.
Definition: RInterface.hxx:834
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Define a new column.
Definition: RInterface.hxx:416
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
Definition: RInterface.hxx:116
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action).
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
Register a systematic variation that affects multiple columns simultaneously.
Definition: RInterface.hxx:716
RInterface< Proxied, DS_t > RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:486
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... >)
Implementation of cache.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action).
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RInterface< Proxied, DS_t > JittedVaryImpl(const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName, bool isSingleColumn)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action).
RResultPtr< std::decay_t< T > > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action).
std::enable_if_t<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > > DefineImpl(std::string_view, F, const ColumnNames_t &)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:937
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RInterface< Proxied, DS_t > RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:467
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action).
RResultPtr<::TGraphAsymmErrors > GraphAsymmErrors(std::string_view x="", std::string_view y="", std::string_view exl="", std::string_view exh="", std::string_view eyl="", std::string_view eyh="")
Fill and return a TGraphAsymmErrors object (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
Definition: RInterface.hxx:955
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot and the current entry.
Definition: RInterface.hxx:388
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action).
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:276
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
Fill and return a two-dimensional profile (lazy action).
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:207
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface(RInterface &&)=default
Move-ctor for RInterface.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action).
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
Definition: RInterface.hxx:230
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end).
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action).
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action).
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action).
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:41
Smart pointer for the return type of actions.
Definition: RResultPtr.hxx:88
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Definition: RDataFrame.hxx:40
TDirectory::TContext keeps track and restore the current directory.
Definition: TDirectory.h:89
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
Statistical variable, defined by its mean and variance (RMS).
Definition: TStatistic.h:33
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
basic_string_view< char > string_view
#define F(x, y, z)
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, RLoopManager &df, std::shared_ptr< ROOT::Internal::RDF::RActionBase > actionPtr)
Create a RResultPtr and set its pointer to the corresponding RAction This overload is invoked by non-...
Definition: RResultPtr.hxx:409
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
Throw if the column has systematic variations attached.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
void RemoveDuplicates(ColumnNames_t &columnNames)
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:99
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string PrettyPrintAddr(const void *const addr)
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap, bool isSingleColumn)
Book the jitting of a Vary call.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(const std::vector< std::string > &branches, TTree *tree, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &colRegister, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
void TriggerRun(ROOT::RDF::RNode &node)
Trigger the execution of an RDataFrame computation graph.
std::vector< std::string > GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
double T(double x)
Definition: ChebyshevPol.h:34
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
std::vector< std::string > ColumnNames_t
ROOT type_traits extensions.
Definition: TypeTraits.hxx:21
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition: TROOT.cxx:527
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:558
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition: TROOT.cxx:544
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1202
RooArgSet S(Args_t &&... args)
Definition: RooArgSet.h:240
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
Definition: graph.py:1
Definition: tree.py:1
type is TypeList if MustRemove is false, otherwise it is a TypeList with the first type removed
Definition: Utils.hxx:139
A collection of options to steer the creation of the dataset on file.
bool fLazy
Do not start the event loop when Snapshot is called.
A struct which stores the parameters of a TH1D.
Definition: HistoModels.hxx:30
std::shared_ptr<::TH1D > GetHistogram() const
A struct which stores the parameters of a TH2D.
Definition: HistoModels.hxx:48
std::shared_ptr<::TH2D > GetHistogram() const
A struct which stores the parameters of a TH3D.
Definition: HistoModels.hxx:73
std::shared_ptr<::TH3D > GetHistogram() const
A struct which stores the parameters of a THnD.
std::shared_ptr<::THnD > GetHistogram() const
A struct which stores the parameters of a TProfile.
std::shared_ptr<::TProfile > GetProfile() const
A struct which stores the parameters of a TProfile2D.
std::shared_ptr<::TProfile2D > GetProfile() const
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:25