Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RInterface.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
13
14#include "ROOT/RDataSource.hxx"
20#include "ROOT/RDF/RDefine.hxx"
22#include "ROOT/RDF/RFilter.hxx"
27#include "ROOT/RDF/RRange.hxx"
29#include "ROOT/RDF/Utils.hxx"
32#include "ROOT/RResultPtr.hxx"
34#include <string_view>
35#include "ROOT/RVec.hxx"
36#include "ROOT/TypeTraits.hxx"
37#include "RtypesCore.h" // for ULong64_t
38#include "TDirectory.h"
39#include "TH1.h" // For Histo actions
40#include "TH2.h" // For Histo actions
41#include "TH3.h" // For Histo actions
42#include "THn.h"
43#include "THnSparse.h"
44#include "TProfile.h"
45#include "TProfile2D.h"
46#include "TStatistic.h"
47
48// TODO: Needed to show the info message in Snapshot, remove in 6.40
49#include "ROOT/RLogger.hxx"
50#include "ROOT/RVersion.hxx"
51#include "TEnv.h"
52#include <cstdlib>
53#include <cstring>
54
55#include <algorithm>
56#include <cstddef>
57#include <initializer_list>
58#include <iterator> // std::back_insterter
59#include <limits>
60#include <memory>
61#include <set>
62#include <sstream>
63#include <stdexcept>
64#include <string>
65#include <type_traits> // is_same, enable_if
66#include <typeinfo>
67#include <unordered_set>
68#include <utility> // std::index_sequence
69#include <vector>
70#include <any>
71
72class TGraph;
73
74// Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
75namespace ROOT {
79class RDataFrame;
80} // namespace ROOT
81namespace cling {
82std::string printValue(ROOT::RDataFrame *tdf);
83}
84
85namespace ROOT {
86namespace RDF {
89namespace TTraits = ROOT::TypeTraits;
90
91template <typename Proxied, typename DataSource>
92class RInterface;
93
95} // namespace RDF
96
97namespace Internal {
98namespace RDF {
100void ChangeEmptyEntryRange(const ROOT::RDF::RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
101void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end);
104std::string GetDataSourceLabel(const ROOT::RDF::RNode &node);
105void SetTTreeLifeline(ROOT::RDF::RNode &node, std::any lifeline);
106} // namespace RDF
107} // namespace Internal
108
109namespace RDF {
110
111// clang-format off
112/**
113 * \class ROOT::RDF::RInterface
114 * \ingroup dataframe
115 * \brief The public interface to the RDataFrame federation of classes.
116 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
117 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
118 *
119 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
120 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
121 */
122// clang-format on
123template <typename Proxied, typename DataSource = void>
129 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
131
132 template <typename T, typename W>
133 friend class RInterface;
134
136 friend void RDFInternal::ChangeEmptyEntryRange(const RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
137 friend void RDFInternal::ChangeBeginAndEndEntries(const RNode &node, Long64_t start, Long64_t end);
139 friend std::string ROOT::Internal::RDF::GetDataSourceLabel(const RNode &node);
141 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
142
143public:
144 ////////////////////////////////////////////////////////////////////////////
145 /// \brief Copy-assignment operator for RInterface.
146 RInterface &operator=(const RInterface &) = default;
147
148 ////////////////////////////////////////////////////////////////////////////
149 /// \brief Copy-ctor for RInterface.
150 RInterface(const RInterface &) = default;
151
152 ////////////////////////////////////////////////////////////////////////////
153 /// \brief Move-ctor for RInterface.
154 RInterface(RInterface &&) = default;
155
156 ////////////////////////////////////////////////////////////////////////////
157 /// \brief Move-assignment operator for RInterface.
159
160 ////////////////////////////////////////////////////////////////////////////
161 /// \brief Build a RInterface from a RLoopManager.
162 /// This constructor is only available for RInterface<RLoopManager>.
164 RInterface(const std::shared_ptr<RLoopManager> &proxied) : RInterfaceBase(proxied), fProxiedPtr(proxied)
165 {
166 }
167
168 ////////////////////////////////////////////////////////////////////////////
169 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
170 /// Different RDataFrame methods return different C++ types. All nodes, however,
171 /// can be cast to this common type at the cost of a small performance penalty.
172 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
173 /// around via (non-template, C++11) helper functions.
174 /// Example usage:
175 /// ~~~{.cpp}
176 /// // a function that conditionally adds a Range to a RDataFrame node.
177 /// RNode MaybeAddRange(RNode df, bool mustAddRange)
178 /// {
179 /// return mustAddRange ? df.Range(1) : df;
180 /// }
181 /// // use as :
182 /// ROOT::RDataFrame df(10);
183 /// auto maybeRanged = MaybeAddRange(df, true);
184 /// ~~~
185 /// Note that it is not a problem to pass RNode's by value.
186 operator RNode() const
187 {
188 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister);
189 }
190
191 ////////////////////////////////////////////////////////////////////////////
192 /// \brief Append a filter to the call graph.
193 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
194 /// signalling whether the event has passed the selection (true) or not (false).
195 /// \param[in] columns Names of the columns/branches in input to the filter function.
196 /// \param[in] name Optional name of this filter. See `Report`.
197 /// \return the filter node of the computation graph.
198 ///
199 /// Append a filter node at the point of the call graph corresponding to the
200 /// object this method is called on.
201 /// The callable `f` should not have side-effects (e.g. modification of an
202 /// external or static variable) to ensure correct results when implicit
203 /// multi-threading is active.
204 ///
205 /// RDataFrame only evaluates filters when necessary: if multiple filters
206 /// are chained one after another, they are executed in order and the first
207 /// one returning false causes the event to be discarded.
208 /// Even if multiple actions or transformations depend on the same filter,
209 /// it is executed once per entry. If its result is requested more than
210 /// once, the cached result is served.
211 ///
212 /// ### Example usage:
213 /// ~~~{.cpp}
214 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
215 /// auto filtered = df.Filter(myCut, {"x", "y"});
216 ///
217 /// // String: it must contain valid C++ except that column names can be used instead of variable names
218 /// auto filtered = df.Filter("x*y > 0");
219 /// ~~~
220 ///
221 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
222 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
223 /// ~~~{.cpp}
224 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
225 /// ~~~
226 /// but instead this will:
227 /// ~~~{.cpp}
228 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
229 /// ~~~
232 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
233 {
234 RDFInternal::CheckFilter(f);
235 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
236 constexpr auto nColumns = ColTypes_t::list_size;
239
241
242 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name);
244 }
245
246 ////////////////////////////////////////////////////////////////////////////
247 /// \brief Append a filter to the call graph.
248 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
249 /// signalling whether the event has passed the selection (true) or not (false).
250 /// \param[in] name Optional name of this filter. See `Report`.
251 /// \return the filter node of the computation graph.
252 ///
253 /// Refer to the first overload of this method for the full documentation.
256 {
257 // The sfinae is there in order to pick up the overloaded method which accepts two strings
258 // rather than this template method.
259 return Filter(f, {}, name);
260 }
261
262 ////////////////////////////////////////////////////////////////////////////
263 /// \brief Append a filter to the call graph.
264 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
265 /// signalling whether the event has passed the selection (true) or not (false).
266 /// \param[in] columns Names of the columns/branches in input to the filter function.
267 /// \return the filter node of the computation graph.
268 ///
269 /// Refer to the first overload of this method for the full documentation.
270 template <typename F>
271 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
272 {
273 return Filter(f, ColumnNames_t{columns});
274 }
275
276 ////////////////////////////////////////////////////////////////////////////
277 /// \brief Append a filter to the call graph.
278 /// \param[in] expression The filter expression in C++
279 /// \param[in] name Optional name of this filter. See `Report`.
280 /// \return the filter node of the computation graph.
281 ///
282 /// The expression is just-in-time compiled and used to filter entries. It must
283 /// be valid C++ syntax in which variable names are substituted with the names
284 /// of branches/columns.
285 ///
286 /// ### Example usage:
287 /// ~~~{.cpp}
288 /// auto filtered_df = df.Filter("myCollection.size() > 3");
289 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
290 /// ~~~
291 ///
292 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
293 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
294 /// ~~~{.cpp}
295 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
296 /// ~~~
297 /// but instead this will:
298 /// ~~~{.cpp}
299 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
300 /// ~~~
301 RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "")
302 {
303 // deleted by the jitted call to JitFilterHelper
304 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
305 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
307 const auto jittedFilter =
309
311 }
312
313 ////////////////////////////////////////////////////////////////////////////
314 /// \brief Discard entries with missing values
315 /// \param[in] column Column name whose entries with missing values should be discarded
316 /// \return The filter node of the computation graph
317 ///
318 /// This operation is useful in case an entry of the dataset is incomplete,
319 /// i.e. if one or more of the columns do not have valid values. If the value
320 /// of the input column is missing for an entry, the entire entry will be
321 /// discarded from the rest of this branch of the computation graph.
322 ///
323 /// Use cases include:
324 /// * When processing multiple files, one or more of them is missing a column
325 /// * In horizontal joining with entry matching, a certain dataset has no
326 /// match for the current entry.
327 ///
328 /// ### Example usage:
329 ///
330 /// \code{.py}
331 /// # Assume a dataset with columns [idx, x] matching another dataset with
332 /// # columns [idx, y]. For idx == 42, the right-hand dataset has no match
333 /// df = ROOT.RDataFrame(dataset)
334 /// df_nomissing = df.FilterAvailable("idx").Define("z", "x + y")
335 /// colz = df_nomissing.Take[int]("z")
336 /// \endcode
337 ///
338 /// \code{.cpp}
339 /// // Assume a dataset with columns [idx, x] matching another dataset with
340 /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match
341 /// ROOT::RDataFrame df{dataset};
342 /// auto df_nomissing = df.FilterAvailable("idx")
343 /// .Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
344 /// auto colz = df_nomissing.Take<int>("z");
345 /// \endcode
346 ///
347 /// \note See FilterMissing() if you want to keep only the entries with
348 /// missing values instead.
350 {
351 const auto columns = ColumnNames_t{column.data()};
352 // For now disable this functionality in case of an empty data source and
353 // the column name was not defined previously.
354 if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
355 throw std::runtime_error("Unknown column: \"" + std::string(column) + "\"");
357 auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ true, fProxiedPtr, fColRegister, columns);
360 }
361
362 ////////////////////////////////////////////////////////////////////////////
363 /// \brief Keep only the entries that have missing values.
364 /// \param[in] column Column name whose entries with missing values should be kept
365 /// \return The filter node of the computation graph
366 ///
367 /// This operation is useful in case an entry of the dataset is incomplete,
368 /// i.e. if one or more of the columns do not have valid values. It only
369 /// keeps the entries for which the value of the input column is missing.
370 ///
371 /// Use cases include:
372 /// * When processing multiple files, one or more of them is missing a column
373 /// * In horizontal joining with entry matching, a certain dataset has no
374 /// match for the current entry.
375 ///
376 /// ### Example usage:
377 ///
378 /// \code{.py}
379 /// # Assume a dataset made of two files vertically chained together, one has
380 /// # column "x" and the other has column "y"
381 /// df = ROOT.RDataFrame(dataset)
382 /// df_valid_col_x = df.FilterMissing("y")
383 /// df_valid_col_y = df.FilterMissing("x")
384 /// display_x = df_valid_col_x.Display(("x",))
385 /// display_y = df_valid_col_y.Display(("y",))
386 /// \endcode
387 ///
388 /// \code{.cpp}
389 /// // Assume a dataset made of two files vertically chained together, one has
390 /// // column "x" and the other has column "y"
391 /// ROOT.RDataFrame df{dataset};
392 /// auto df_valid_col_x = df.FilterMissing("y");
393 /// auto df_valid_col_y = df.FilterMissing("x");
394 /// auto display_x = df_valid_col_x.Display<int>({"x"});
395 /// auto display_y = df_valid_col_y.Display<int>({"y"});
396 /// \endcode
397 ///
398 /// \note See FilterAvailable() if you want to discard the entries in case
399 /// there is a missing value instead.
401 {
402 const auto columns = ColumnNames_t{column.data()};
403 // For now disable this functionality in case of an empty data source and
404 // the column name was not defined previously.
405 if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
406 throw std::runtime_error("Unknown column: \"" + std::string(column) + "\"");
408 auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ false, fProxiedPtr, fColRegister, columns);
411 }
412
413 // clang-format off
414 ////////////////////////////////////////////////////////////////////////////
415 /// \brief Define a new column.
416 /// \param[in] name The name of the defined column.
417 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
418 /// \param[in] columns Names of the columns/branches in input to the producer function.
419 /// \return the first node of the computation graph for which the new quantity is defined.
420 ///
421 /// Define a column that will be visible from all subsequent nodes
422 /// of the functional chain. The `expression` is only evaluated for entries that pass
423 /// all the preceding filters.
424 /// A new variable is created called `name`, accessible as if it was contained
425 /// in the dataset from subsequent transformations/actions.
426 ///
427 /// Use cases include:
428 /// * caching the results of complex calculations for easy and efficient multiple access
429 /// * extraction of quantities of interest from complex objects
430 ///
431 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
432 ///
433 /// ### Example usage:
434 /// ~~~{.cpp}
435 /// // assuming a function with signature:
436 /// double myComplexCalculation(const RVec<float> &muon_pts);
437 /// // we can pass it directly to Define
438 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
439 /// // alternatively, we can pass the body of the function as a string, as in Filter:
440 /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
441 /// ~~~
442 ///
443 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
444 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
445 /// ~~~{.cpp}
446 /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
447 /// ~~~
448 /// but instead this will:
449 /// ~~~{.cpp}
450 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
451 /// ~~~
453 RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {})
454 {
455 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Define");
456 }
457 // clang-format on
458
459 // clang-format off
460 ////////////////////////////////////////////////////////////////////////////
461 /// \brief Define a new column with a value dependent on the processing slot.
462 /// \param[in] name The name of the defined column.
463 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
464 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
465 /// \return the first node of the computation graph for which the new quantity is defined.
466 ///
467 /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner.
468 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
469 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
470 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
471 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
472 /// Note that there is no guarantee as to how often each slot will be reached during the event loop.
473 ///
474 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
475 /// ~~~{.cpp}
476 /// int function(unsigned int, double, double);
477 /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
478 /// df.DefineSlot("x", function, {"column1", "column2"})
479 /// ~~~
480 ///
481 /// See Define() for more information.
482 template <typename F>
483 RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
484 {
485 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "DefineSlot");
486 }
487 // clang-format on
488
489 // clang-format off
490 ////////////////////////////////////////////////////////////////////////////
491 /// \brief Define a new column with a value dependent on the processing slot and the current entry.
492 /// \param[in] name The name of the defined column.
493 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
494 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
495 /// \return the first node of the computation graph for which the new quantity is defined.
496 ///
497 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
498 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
499 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
500 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
501 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
502 /// Note that there is no guarantee as to how often each slot will be reached during the event loop.
503 /// The second parameter is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
504 ///
505 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
506 /// ~~~{.cpp}
507 /// int function(unsigned int, ULong64_t, double, double);
508 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
509 /// DefineSlotEntry("x", function, {"column1", "column2"})
510 /// ~~~
511 ///
512 /// See Define() for more information.
513 template <typename F>
514 RInterface<Proxied, DS_t> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
515 {
517 "DefineSlotEntry");
518 }
519 // clang-format on
520
521 ////////////////////////////////////////////////////////////////////////////
522 /// \brief Define a new column.
523 /// \param[in] name The name of the defined column.
524 /// \param[in] expression An expression in C++ which represents the defined value
525 /// \return the first node of the computation graph for which the new quantity is defined.
526 ///
527 /// The expression is just-in-time compiled and used to produce the column entries.
528 /// It must be valid C++ syntax in which variable names are substituted with the names
529 /// of branches/columns.
530 ///
531 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
532 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
533 /// ~~~{.cpp}
534 /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
535 /// ~~~
536 /// but instead this will:
537 /// ~~~{.cpp}
538 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
539 /// ~~~
540 ///
541 /// Refer to the first overload of this method for the full documentation.
542 RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression)
543 {
544 constexpr auto where = "Define";
546 // these checks must be done before jitting lest we throw exceptions in jitted code
549
550 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
551 auto jittedDefine =
553
555 newCols.AddDefine(std::move(jittedDefine));
556
558
559 return newInterface;
560 }
561
562 ////////////////////////////////////////////////////////////////////////////
563 /// \brief Overwrite the value and/or type of an existing column.
564 /// \param[in] name The name of the column to redefine.
565 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
566 /// \param[in] columns Names of the columns/branches in input to the expression.
567 /// \return the first node of the computation graph for which the quantity is redefined.
568 ///
569 /// The old value of the column can be used as an input for the expression.
570 ///
571 /// An exception is thrown in case the column to redefine does not already exist.
572 /// See Define() for more information.
574 RInterface<Proxied, DS_t> Redefine(std::string_view name, F expression, const ColumnNames_t &columns = {})
575 {
576 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Redefine");
577 }
578
579 // clang-format off
580 ////////////////////////////////////////////////////////////////////////////
581 /// \brief Overwrite the value and/or type of an existing column.
582 /// \param[in] name The name of the column to redefine.
583 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
584 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot).
585 /// \return the first node of the computation graph for which the new quantity is defined.
586 ///
587 /// The old value of the column can be used as an input for the expression.
588 /// An exception is thrown in case the column to redefine does not already exist.
589 ///
590 /// See DefineSlot() for more information.
591 // clang-format on
592 template <typename F>
593 RInterface<Proxied, DS_t> RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
594 {
595 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "RedefineSlot");
596 }
597
598 // clang-format off
599 ////////////////////////////////////////////////////////////////////////////
600 /// \brief Overwrite the value and/or type of an existing column.
601 /// \param[in] name The name of the column to redefine.
602 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
603 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
604 /// \return the first node of the computation graph for which the new quantity is defined.
605 ///
606 /// The old value of the column can be used as an input for the expression.
607 /// An exception is thrown in case the column to re-define does not already exist.
608 ///
609 /// See DefineSlotEntry() for more information.
610 // clang-format on
611 template <typename F>
612 RInterface<Proxied, DS_t> RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
613 {
615 "RedefineSlotEntry");
616 }
617
618 ////////////////////////////////////////////////////////////////////////////
619 /// \brief Overwrite the value and/or type of an existing column.
620 /// \param[in] name The name of the column to redefine.
621 /// \param[in] expression An expression in C++ which represents the defined value
622 /// \return the first node of the computation graph for which the new quantity is defined.
623 ///
624 /// The expression is just-in-time compiled and used to produce the column entries.
625 /// It must be valid C++ syntax in which variable names are substituted with the names
626 /// of branches/columns.
627 ///
628 /// The old value of the column can be used as an input for the expression.
629 /// An exception is thrown in case the column to re-define does not already exist.
630 ///
631 /// Aliases cannot be overridden. See the corresponding Define() overload for more information.
651
652 ////////////////////////////////////////////////////////////////////////////
653 /// \brief In case the value in the given column is missing, provide a default value
654 /// \tparam T The type of the column
655 /// \param[in] column Column name where missing values should be replaced by the given default value
656 /// \param[in] defaultValue Value to provide instead of a missing value
657 /// \return The node of the graph that will provide a default value
658 ///
659 /// This operation is useful in case an entry of the dataset is incomplete,
660 /// i.e. if one or more of the columns do not have valid values. It does not
661 /// modify the values of the column, but in case any entry is missing, it
662 /// will provide the default value to downstream nodes instead.
663 ///
664 /// Use cases include:
665 /// * When processing multiple files, one or more of them is missing a column
666 /// * In horizontal joining with entry matching, a certain dataset has no
667 /// match for the current entry.
668 ///
669 /// ### Example usage:
670 ///
671 /// \code{.cpp}
672 /// // Assume a dataset with columns [idx, x] matching another dataset with
673 /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match
674 /// ROOT::RDataFrame df{dataset};
675 /// auto df_default = df.DefaultValueFor("y", 33)
676 /// .Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
677 /// auto colz = df_default.Take<int>("z");
678 /// \endcode
679 ///
680 /// \code{.py}
681 /// df = ROOT.RDataFrame(dataset)
682 /// df_default = df.DefaultValueFor("y", 33).Define("z", "x + y")
683 /// colz = df_default.Take[int]("z")
684 /// \endcode
685 template <typename T>
686 RInterface<Proxied, DS_t> DefaultValueFor(std::string_view column, const T &defaultValue)
687 {
688 constexpr auto where{"DefaultValueFor"};
690 // For now disable this functionality in case of an empty data source and
691 // the column name was not defined previously.
692 if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
695
696 // Declare return type to the interpreter, for future use by jitted actions
698 if (retTypeName.empty()) {
699 // The type is not known to the interpreter.
700 // We must not error out here, but if/when this column is used in jitted code
701 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(T));
702 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
703 }
704
705 const auto validColumnNames = ColumnNames_t{column.data()};
706 auto newColumn = std::make_shared<ROOT::Internal::RDF::RDefaultValueFor<T>>(
707 column, retTypeName, defaultValue, validColumnNames, fColRegister, *fLoopManager);
709
711 newCols.AddDefine(std::move(newColumn));
712
714
715 return newInterface;
716 }
717
718 // clang-format off
719 ////////////////////////////////////////////////////////////////////////////
720 /// \brief Define a new column that is updated when the input sample changes.
721 /// \param[in] name The name of the defined column.
722 /// \param[in] expression A C++ callable that computes the new value of the defined column.
723 /// \return the first node of the computation graph for which the new quantity is defined.
724 ///
725 /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)`
726 /// where:
727 /// - `T` is the type of the defined column
728 /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify
729 /// the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
730 /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is
731 /// being processed (see the class docs for more information).
732 ///
733 /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being
734 /// processed or to inject a callback into the event loop that is only called when the processing of a new sample
735 /// starts rather than at every entry.
736 ///
737 /// The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often.
738 ///
739 /// ### Example usage:
740 /// ~~~{.cpp}
741 /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
742 /// df.DefinePerSample("weightbysample",
743 /// [](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
744 /// { return id.Contains("sample1") ? 1.0f : 2.0f; });
745 /// ~~~
746 // clang-format on
747 // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch
749 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, F expression)
750 {
751 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
754
755 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t));
756 if (retTypeName.empty()) {
757 // The type is not known to the interpreter.
758 // We must not error out here, but if/when this column is used in jitted code
759 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t));
760 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
761 }
762
763 auto newColumn =
764 std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager);
765
767 newCols.AddDefine(std::move(newColumn));
769 return newInterface;
770 }
771
772 // clang-format off
773 ////////////////////////////////////////////////////////////////////////////
774 /// \brief Define a new column that is updated when the input sample changes.
775 /// \param[in] name The name of the defined column.
776 /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value.
777 /// \return the first node of the computation graph for which the new quantity is defined.
778 ///
779 /// The expression is just-in-time compiled and used to produce the column entries.
780 /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is
781 /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the
782 /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.
783 ///
784 /// ### Example usage:
785 /// ~~~{.py}
786 /// df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root'])
787 /// df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f')
788 /// ~~~
789 ///
790 /// \note
791 /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this
792 /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as
793 /// input parameters. This is for example the correct way to call this overload when working in PyROOT:
794 /// ~~~{.py}
795 /// ROOT.gInterpreter.Declare(
796 /// """
797 /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
798 /// return id.Contains("sample1") ? 1.0f : 2.0f;
799 /// }
800 /// """)
801 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
802 /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
803 /// ~~~
804 ///
805 /// \note
806 /// Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain
807 /// column names other than those mentioned above: the expression is evaluated once before the processing of the
808 /// sample even starts, so column values are not accessible.
809 // clang-format on
810 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, std::string_view expression)
811 {
812 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
813 // these checks must be done before jitting lest we throw exceptions in jitted code
816
817 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
818 auto jittedDefine =
820
822 newCols.AddDefine(std::move(jittedDefine));
823
825
826 return newInterface;
827 }
828
829 /// \brief Register systematic variations for a single existing column using custom variation tags.
830 /// \param[in] colName name of the column for which varied values are provided.
831 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
832 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
833 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
834 /// \param[in] inputColumns the names of the columns to be passed to the callable.
835 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
836 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
837 ///
838 /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to
839 /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for
840 /// results that depend on any varied quantity, a map/dictionary of varied results can be produced with
841 /// ROOT::RDF::Experimental::VariationsFor (see the example below).
842 ///
843 /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and
844 /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or
845 /// indirect dependencies of the column values on some registered variations). The keys will be a composition of
846 /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below.
847 ///
848 /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt.
849 /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"):
850 /// ~~~{.cpp}
851 /// auto nominal_hx =
852 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"})
853 /// .Filter("pt > k")
854 /// .Define("x", someFunc, {"pt"})
855 /// .Histo1D("x");
856 ///
857 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
858 /// hx["nominal"].Draw();
859 /// hx["pt:down"].Draw("SAME");
860 /// hx["pt:up"].Draw("SAME");
861 /// ~~~
862 /// RDataFrame computes all variations as part of a single loop over the data.
863 /// In particular, this means that I/O and computation of values shared
864 /// among variations only happen once for all variations. Thus, the event loop
865 /// run-time typically scales much better than linearly with the number of
866 /// variations.
867 ///
868 /// RDataFrame lazily computes the varied values required to produce the
869 /// outputs of \ref ROOT::RDF::Experimental::VariationsFor "VariationsFor()". If \ref
870 /// ROOT::RDF::Experimental::VariationsFor "VariationsFor()" was not called for a result, the computations are only
871 /// run for the nominal case.
872 ///
873 /// See other overloads for examples when variations are added for multiple existing columns,
874 /// or when the tags are auto-generated instead of being directly defined.
875 template <typename F>
876 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
877 const std::vector<std::string> &variationTags, std::string_view variationName = "")
878 {
879 std::vector<std::string> colNames{{std::string(colName)}};
880 const std::string theVariationName{variationName.empty() ? colName : variationName};
881
882 return VaryImpl<true>(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags,
884 }
885
886 /// \brief Register systematic variations for a single existing column using auto-generated variation tags.
887 /// \param[in] colName name of the column for which varied values are provided.
888 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
889 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
890 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
891 /// \param[in] inputColumns the names of the columns to be passed to the callable.
892 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
893 /// `"1"`, etc.
894 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
895 /// colName is used if none is provided.
896 ///
897 /// This overload of Vary takes an nVariations parameter instead of a list of tag names.
898 /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N`
899 /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`.
900 ///
901 /// Example usage:
902 /// ~~~{.cpp}
903 /// auto nominal_hx =
904 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, 2)
905 /// .Histo1D("x");
906 ///
907 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
908 /// hx["nominal"].Draw();
909 /// hx["x:0"].Draw("SAME");
910 /// hx["x:1"].Draw("SAME");
911 /// ~~~
912 ///
913 /// \note See also This Vary() overload for more information.
914 template <typename F>
915 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
916 std::size_t nVariations, std::string_view variationName = "")
917 {
918 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
919
920 std::vector<std::string> variationTags;
921 variationTags.reserve(nVariations);
922 for (std::size_t i = 0u; i < nVariations; ++i)
923 variationTags.emplace_back(std::to_string(i));
924
925 const std::string theVariationName{variationName.empty() ? colName : variationName};
926
927 return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName);
928 }
929
930 /// \brief Register systematic variations for multiple existing columns using custom variation tags.
931 /// \param[in] colNames set of names of the columns for which varied values are provided.
932 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
933 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
934 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
935 /// \param[in] inputColumns the names of the columns to be passed to the callable.
936 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
937 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`
938 ///
939 /// This overload of Vary takes a list of column names as first argument and
940 /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each
941 /// affected column. The `variationTags` are defined as `{"down", "up"}`.
942 ///
943 /// Example usage:
944 /// ~~~{.cpp}
945 /// // produce variations "ptAndEta:down" and "ptAndEta:up"
946 /// auto nominal_hx =
947 /// df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
948 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
949 /// {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied
950 /// {"down", "up"}, // variation tags
951 /// "ptAndEta") // variation name
952 /// .Histo1D("pt", "eta");
953 ///
954 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
955 /// hx["nominal"].Draw();
956 /// hx["ptAndEta:down"].Draw("SAME");
957 /// hx["ptAndEta:up"].Draw("SAME");
958 /// ~~~
959 ///
960 /// \note See also This Vary() overload for more information.
961
962 template <typename F>
964 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
965 const std::vector<std::string> &variationTags, std::string_view variationName)
966 {
967 return VaryImpl<false>(colNames, std::forward<F>(expression), inputColumns, variationTags, variationName);
968 }
969
970 /// \brief Register systematic variations for multiple existing columns using custom variation tags.
971 /// \param[in] colNames set of names of the columns for which varied values are provided.
972 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
973 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
974 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
975 /// \param[in] inputColumns the names of the columns to be passed to the callable.
976 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
977 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
978 /// colName is used if none is provided.
979 ///
980 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
981 /// is avoided.
982 ///
983 /// \note See also This Vary() overload for more information.
984 template <typename F>
986 Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
987 const std::vector<std::string> &variationTags, std::string_view variationName)
988 {
989 return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, variationTags, variationName);
990 }
991
992 /// \brief Register systematic variations for multiple existing columns using auto-generated tags.
993 /// \param[in] colNames set of names of the columns for which varied values are provided.
994 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
995 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
996 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
997 /// \param[in] inputColumns the names of the columns to be passed to the callable.
998 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
999 /// `"1"`, etc.
1000 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1001 /// colName is used if none is provided.
1002 ///
1003 /// This overload of Vary takes a list of column names as first argument.
1004 /// It takes an `nVariations` parameter instead of a list of tag names (`variationTags`). Tag names
1005 /// will be auto-generated as the sequence 0...``nVariations-1``.
1006 ///
1007 /// Example usage:
1008 /// ~~~{.cpp}
1009 /// auto nominal_hx =
1010 /// df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
1011 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
1012 /// {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied
1013 /// 2, // auto-generated variation tags
1014 /// "ptAndEta") // variation name
1015 /// .Histo1D("pt", "eta");
1016 ///
1017 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1018 /// hx["nominal"].Draw();
1019 /// hx["ptAndEta:0"].Draw("SAME");
1020 /// hx["ptAndEta:1"].Draw("SAME");
1021 /// ~~~
1022 ///
1023 /// \note See also This Vary() overload for more information.
1024 template <typename F>
1026 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
1027 std::size_t nVariations, std::string_view variationName)
1028 {
1029 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
1030
1031 std::vector<std::string> variationTags;
1032 variationTags.reserve(nVariations);
1033 for (std::size_t i = 0u; i < nVariations; ++i)
1034 variationTags.emplace_back(std::to_string(i));
1035
1036 return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName);
1037 }
1038
1039 /// \brief Register systematic variations for for multiple existing columns using custom variation tags.
1040 /// \param[in] colNames set of names of the columns for which varied values are provided.
1041 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
1042 /// take any column values as input, similarly to what happens during Filter and Define calls. It must
1043 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
1044 /// \param[in] inputColumns the names of the columns to be passed to the callable.
1045 /// \param[in] inputColumns the names of the columns to be passed to the callable.
1046 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1047 /// `"1"`, etc.
1048 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1049 /// colName is used if none is provided.
1050 ///
1051 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
1052 /// is avoided.
1053 ///
1054 /// \note See also This Vary() overload for more information.
1055 template <typename F>
1057 Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
1058 std::size_t nVariations, std::string_view variationName)
1059 {
1060 return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, nVariations, variationName);
1061 }
1062
1063 /// \brief Register systematic variations for a single existing column using custom variation tags.
1064 /// \param[in] colName name of the column for which varied values are provided.
1065 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1066 /// values for the specified column.
1067 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
1068 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1069 /// colName is used if none is provided.
1070 ///
1071 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1072 /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are
1073 /// defined as `{"down", "up"}`.
1074 /// ~~~{.cpp}
1075 /// auto nominal_hx =
1076 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"})
1077 /// .Filter("pt > k")
1078 /// .Define("x", someFunc, {"pt"})
1079 /// .Histo1D("x");
1080 ///
1081 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1082 /// hx["nominal"].Draw();
1083 /// hx["pt:down"].Draw("SAME");
1084 /// hx["pt:up"].Draw("SAME");
1085 /// ~~~
1086 ///
1087 /// \note See also This Vary() overload for more information.
1088 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression,
1089 const std::vector<std::string> &variationTags, std::string_view variationName = "")
1090 {
1091 std::vector<std::string> colNames{{std::string(colName)}};
1092 const std::string theVariationName{variationName.empty() ? colName : variationName};
1093
1094 return JittedVaryImpl(colNames, expression, variationTags, theVariationName, /*isSingleColumn=*/true);
1095 }
1096
1097 /// \brief Register systematic variations for a single existing column using auto-generated variation tags.
1098 /// \param[in] colName name of the column for which varied values are provided.
1099 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1100 /// values for the specified column.
1101 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1102 /// `"1"`, etc.
1103 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1104 /// colName is used if none is provided.
1105 ///
1106 /// This overload adds the possibility for the expression used to evaluate the varied values to be a just-in-time
1107 /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are
1108 /// auto-generated.
1109 /// ~~~{.cpp}
1110 /// auto nominal_hx =
1111 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", 2)
1112 /// .Histo1D("pt");
1113 ///
1114 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1115 /// hx["nominal"].Draw();
1116 /// hx["pt:0"].Draw("SAME");
1117 /// hx["pt:1"].Draw("SAME");
1118 /// ~~~
1119 ///
1120 /// \note See also This Vary() overload for more information.
1121 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations,
1122 std::string_view variationName = "")
1123 {
1124 std::vector<std::string> variationTags;
1125 variationTags.reserve(nVariations);
1126 for (std::size_t i = 0u; i < nVariations; ++i)
1127 variationTags.emplace_back(std::to_string(i));
1128
1129 return Vary(colName, expression, std::move(variationTags), variationName);
1130 }
1131
1132 /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags.
1133 /// \param[in] colNames set of names of the columns for which varied values are provided.
1134 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
1135 /// values for the specified columns.
1136 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1137 /// `"1"`, etc.
1138 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1139 ///
1140 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1141 /// compiled. It takes an nVariations parameter instead of a list of tag names.
1142 /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N`
1143 /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`.
1144 /// The example below shows how Vary() is used while dealing with multiple columns.
1145 ///
1146 /// ~~~{.cpp}
1147 /// auto nominal_hx =
1148 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy")
1149 /// .Histo1D("x", "y");
1150 ///
1151 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1152 /// hx["nominal"].Draw();
1153 /// hx["xy:0"].Draw("SAME");
1154 /// hx["xy:1"].Draw("SAME");
1155 /// ~~~
1156 ///
1157 /// \note See also This Vary() overload for more information.
1158 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
1159 std::size_t nVariations, std::string_view variationName)
1160 {
1161 std::vector<std::string> variationTags;
1162 variationTags.reserve(nVariations);
1163 for (std::size_t i = 0u; i < nVariations; ++i)
1164 variationTags.emplace_back(std::to_string(i));
1165
1166 return Vary(colNames, expression, std::move(variationTags), variationName);
1167 }
1168
1169 /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags.
1170 /// \param[in] colNames set of names of the columns for which varied values are provided.
1171 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1172 /// values for the specified column.
1173 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1174 /// `"1"`, etc.
1175 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1176 /// colName is used if none is provided.
1177 ///
1178 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
1179 /// is avoided.
1180 ///
1181 /// \note See also This Vary() overload for more information.
1182 RInterface<Proxied, DS_t> Vary(std::initializer_list<std::string> colNames, std::string_view expression,
1183 std::size_t nVariations, std::string_view variationName)
1184 {
1185 return Vary(std::vector<std::string>(colNames), expression, nVariations, variationName);
1186 }
1187
1188 /// \brief Register systematic variations for multiple existing columns using custom variation tags.
1189 /// \param[in] colNames set of names of the columns for which varied values are provided.
1190 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
1191 /// values for the specified columns.
1192 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
1193 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1194 ///
1195 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1196 /// compiled. The example below shows how Vary() is used while dealing with multiple columns. The tags are defined as
1197 /// `{"down", "up"}`.
1198 /// ~~~{.cpp}
1199 /// auto nominal_hx =
1200 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy")
1201 /// .Histo1D("x", "y");
1202 ///
1203 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1204 /// hx["nominal"].Draw();
1205 /// hx["xy:down"].Draw("SAME");
1206 /// hx["xy:up"].Draw("SAME");
1207 /// ~~~
1208 ///
1209 /// \note See also This Vary() overload for more information.
1210 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
1211 const std::vector<std::string> &variationTags, std::string_view variationName)
1212 {
1213 return JittedVaryImpl(colNames, expression, variationTags, variationName, /*isSingleColumn=*/false);
1214 }
1215
1216 ////////////////////////////////////////////////////////////////////////////
1217 /// \brief Allow to refer to a column with a different name.
1218 /// \param[in] alias name of the column alias
1219 /// \param[in] columnName of the column to be aliased
1220 /// \return the first node of the computation graph for which the alias is available.
1221 ///
1222 /// Aliasing an alias is supported.
1223 ///
1224 /// ### Example usage:
1225 /// ~~~{.cpp}
1226 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
1227 /// ~~~
1228 RInterface<Proxied, DS_t> Alias(std::string_view alias, std::string_view columnName)
1229 {
1230 // The symmetry with Define is clear. We want to:
1231 // - Create globally the alias and return this very node, unchanged
1232 // - Make aliases accessible based on chains and not globally
1233
1234 // Helper to find out if a name is a column
1236
1237 constexpr auto where = "Alias";
1239 // If the alias name is a column name, there is a problem
1241
1242 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
1243
1245 newCols.AddAlias(alias, validColumnName);
1246
1248
1249 return newInterface;
1250 }
1251
1252 ////////////////////////////////////////////////////////////////////////////
1253 /// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`.
1254 /// \deprecated Use other overloads that do not require template arguments.
1255 /// \tparam ColumnTypes variadic list of branch/column types.
1256 /// \param[in] treename The name of the output TTree or RNTuple.
1257 /// \param[in] filename The name of the output TFile.
1258 /// \param[in] columnList The list of names of the columns/branches/fields to be written.
1259 /// \param[in] options RSnapshotOptions struct with extra options to pass to the output TFile and TTree/RNTuple.
1260 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1261 ///
1262 template <typename... ColumnTypes>
1264 6, 40, "Snapshot does not need template arguments anymore, you can safely remove them from this function call.")
1265 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1268 {
1269 return Snapshot(treename, filename, columnList, options);
1270 }
1271
1272 ////////////////////////////////////////////////////////////////////////////
1273 /// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`.
1274 /// \param[in] treename The name of the output TTree or RNTuple.
1275 /// \param[in] filename The name of the output TFile.
1276 /// \param[in] columnList The list of names of the columns/branches/fields to be written.
1277 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree/RNTuple.
1278 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1279 ///
1280 /// This function returns a `RDataFrame` built with the output TTree or RNTuple as a source.
1281 /// The types of the columns are automatically inferred and do not need to be specified.
1282 ///
1283 /// Support for writing of nested branches/fields is limited (although RDataFrame is able to read them) and dot ('.')
1284 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
1285 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
1286 /// written out and it appears before the array in the columnList.
1287 ///
1288 /// By default, in case of TTree, TChain or RNTuple inputs, Snapshot will try to write out all top-level branches.
1289 /// For other types of inputs, all columns returned by GetColumnNames() will be written out. Systematic variations of
1290 /// columns will be included if the corresponding flag is set in RSnapshotOptions. See \ref snapshot-with-variations
1291 /// "Snapshot with Variations" for more details. If friend trees or chains are present, by default all friend
1292 /// top-level branches that have names that do not collide with names of branches in the main TTree/TChain will be
1293 /// written out. Since v6.24, Snapshot will also write out friend branches with the same names of branches in the
1294 /// main TTree/TChain with names of the form
1295 /// `<friendname>_<branchname>` in order to differentiate them from the branches in the main tree/chain.
1296 ///
1297 /// ### Writing to a sub-directory
1298 ///
1299 /// Snapshot supports writing the TTree or RNTuple in a sub-directory inside the TFile. It is sufficient to specify
1300 /// the directory path as part of the TTree or RNTuple name, e.g. `df.Snapshot("subdir/t", "f.root")` writes TTree
1301 /// `t` in the sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed).
1302 ///
1303 /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of
1304 /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled
1305 /// with respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in
1306 /// wrong associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will
1307 /// error out if such a "shuffled" TTree is used in a friendship.
1308 ///
1309 /// \note In case no events are written out (e.g. because no event passes all filters), Snapshot will still write the
1310 /// requested output TTree or RNTuple to the file, with all the branches requested to preserve the dataset schema.
1311 ///
1312 /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns
1313 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1314 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1315 /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`.
1316 ///
1317 /// ### Example invocations:
1318 ///
1319 /// ~~~{.cpp}
1320 /// // No need to specify column types, they are automatically deduced thanks
1321 /// // to information coming from the data source
1322 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
1323 /// ~~~
1324 ///
1325 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
1326 /// `RSnapshotOptions`:
1327 /// ~~~{.cpp}
1328 /// RSnapshotOptions opts;
1329 /// opts.fLazy = true;
1330 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
1331 /// ~~~
1332 ///
1333 /// To snapshot to the RNTuple data format, the `fOutputFormat` option in `RSnapshotOptions` needs to be set
1334 /// accordingly:
1335 /// ~~~{.cpp}
1336 /// RSnapshotOptions opts;
1337 /// opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
1338 /// df.Snapshot("outputNTuple", "outputFile.root", {"x"}, opts);
1339 /// ~~~
1340 ///
1341 /// Snapshot systematic variations resulting from a Vary() call (see details \ref snapshot-with-variations "here"):
1342 /// ~~~{.cpp}
1343 /// RSnapshotOptions opts;
1344 /// opts.fIncludeVariations = true;
1345 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
1346 /// ~~~
1349 const RSnapshotOptions &options = RSnapshotOptions())
1350 {
1351 // TODO: Remove before releasing 6.40.00
1352#if ROOT_VERSION_CODE >= ROOT_VERSION(6, 40, 0)
1353 static_assert(false && "Remove information about change of Snapshot defaut compression settings.");
1354#endif
1355 [[maybe_unused]] static bool once = []() {
1356 if (const char *suppress = std::getenv("ROOT_RDF_SNAPSHOT_INFO"))
1357 if (std::strcmp(suppress, "0") == 0)
1358 return true;
1359 if (const char *suppress = gEnv->GetValue("ROOT.RDF.Snapshot.Info", "1"))
1360 if (std::strcmp(suppress, "0") == 0)
1361 return true;
1364 << "\n\tIn ROOT 6.38.00, the default compression settings of Snapshot were changed from 101 (ZLIB with "
1365 "compression level 1, the TTree default) to 505 (ZSTD with compression level 5). The decision was based "
1366 "on empirical evidence available up to that point. New studies summarised at "
1367 "https://github.com/root-project/root/pull/21753 show that in certain cases "
1368 "compression setting 101 is still the best option for TTree. Thus, this choice is reverted in ROOT "
1369 "6.38.06 and later releases. "
1370 "In order to suppress this message, set 'ROOT_RDF_SNAPSHOT_INFO=0' in your environment or set "
1371 "'ROOT.RDF.Snapshot.Info: 0' in your .rootrc file.";
1372 return true;
1373 }();
1374 // like columnList but with `#var` columns removed
1376 // like columnListWithoutSizeColumns but with aliases resolved
1379 // like validCols but with missing size branches required by array branches added in the right positions
1380 const auto pairOfColumnLists =
1384
1385 const auto fullTreeName = treename;
1387 treename = parsedTreePath.fTreeName;
1388 const auto &dirname = parsedTreePath.fDirName;
1389
1391
1393
1394 auto retrieveTypeID = [](const std::string &colName, const std::string &colTypeName,
1395 bool isRNTuple = false) -> const std::type_info * {
1396 try {
1398 } catch (const std::runtime_error &err) {
1399 if (isRNTuple)
1401
1402 if (std::string(err.what()).find("Cannot extract type_info of type") != std::string::npos) {
1403 // We could not find RTTI for this column, thus we cannot write it out at the moment.
1404 std::string trueTypeName{colTypeName};
1405 if (colTypeName.rfind("CLING_UNKNOWN_TYPE", 0) == 0)
1406 trueTypeName = colTypeName.substr(19);
1407 std::string msg{"No runtime type information is available for column \"" + colName +
1408 "\" with type name \"" + trueTypeName +
1409 "\". Thus, it cannot be written to disk with Snapshot. Make sure to generate and load "
1410 "ROOT dictionaries for the type of this column."};
1411
1412 throw std::runtime_error(msg);
1413 } else {
1414 throw;
1415 }
1416 }
1417 };
1418
1420
1421 if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) {
1422 // The data source of the RNTuple resulting from the Snapshot action does not exist yet here, so we create one
1423 // without a data source for now, and set it once the actual data source can be created (i.e., after
1424 // writing the RNTuple).
1425 auto newRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(colListNoPoundSizes));
1426
1427 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
1428 std::string(filename), std::string(dirname), std::string(treename), colListWithAliasesAndSizeBranches,
1429 options, newRDF->GetLoopManager(), GetLoopManager(), true /* fToNTuple */, /*fIncludeVariations=*/false});
1430
1433
1434 const auto nSlots = fLoopManager->GetNSlots();
1435 std::vector<const std::type_info *> colTypeIDs;
1436 colTypeIDs.reserve(nColumns);
1437 for (decltype(nColumns) i{}; i < nColumns; i++) {
1438 const auto &colName = validColumnNames[i];
1440 colName, /*tree*/ nullptr, GetDataSource(), fColRegister.GetDefine(colName), options.fVector2RVec);
1441 const std::type_info *colTypeID = retrieveTypeID(colName, colTypeName, /*isRNTuple*/ true);
1442 colTypeIDs.push_back(colTypeID);
1443 }
1444 // Crucial e.g. if the column names do not correspond to already-available column readers created by the data
1445 // source
1447
1448 auto action =
1450 resPtr = MakeResultPtr(newRDF, *GetLoopManager(), std::move(action));
1451 } else {
1452 if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS" &&
1453 options.fOutputFormat == ESnapshotOutputFormat::kDefault) {
1454 Warning("Snapshot",
1455 "The default Snapshot output data format is TTree, but the input data format is RNTuple. If you "
1456 "want to Snapshot to RNTuple or suppress this warning, set the appropriate fOutputFormat option in "
1457 "RSnapshotOptions. Note that this current default behaviour might change in the future.");
1458 }
1459
1460 // We create an RLoopManager without a data source. This needs to be initialised when the output TTree dataset
1461 // has actually been created and written to TFile, i.e. at the end of the Snapshot execution.
1462 auto newRDF = std::make_shared<RInterface<RLoopManager>>(
1463 std::make_shared<RLoopManager>(colListNoAliasesWithSizeBranches));
1464
1465 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
1466 std::string(filename), std::string(dirname), std::string(treename), colListWithAliasesAndSizeBranches,
1467 options, newRDF->GetLoopManager(), GetLoopManager(), false /* fToRNTuple */, options.fIncludeVariations});
1468
1471
1472 const auto nSlots = fLoopManager->GetNSlots();
1473 std::vector<const std::type_info *> colTypeIDs;
1474 colTypeIDs.reserve(nColumns);
1475 for (decltype(nColumns) i{}; i < nColumns; i++) {
1476 const auto &colName = validColumnNames[i];
1478 colName, /*tree*/ nullptr, GetDataSource(), fColRegister.GetDefine(colName), options.fVector2RVec);
1479 const std::type_info *colTypeID = retrieveTypeID(colName, colTypeName);
1480 colTypeIDs.push_back(colTypeID);
1481 }
1482 // Crucial e.g. if the column names do not correspond to already-available column readers created by the data
1483 // source
1485
1486 auto action =
1488 resPtr = MakeResultPtr(newRDF, *GetLoopManager(), std::move(action));
1489 }
1490
1491 if (!options.fLazy)
1492 *resPtr;
1493 return resPtr;
1494 }
1495
1496 // clang-format off
1497 ////////////////////////////////////////////////////////////////////////////
1498 /// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`.
1499 /// \param[in] treename The name of the output TTree or RNTuple.
1500 /// \param[in] filename The name of the output TFile.
1501 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1502 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree/RNTuple
1503 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1504 ///
1505 /// This function returns a `RDataFrame` built with the output TTree or RNTuple as a source.
1506 /// The types of the columns are automatically inferred and do not need to be specified.
1507 ///
1508 /// See Snapshot(std::string_view, std::string_view, const ColumnNames_t&, const RSnapshotOptions &) for a more complete description and example usages.
1510 std::string_view columnNameRegexp = "",
1511 const RSnapshotOptions &options = RSnapshotOptions())
1512 {
1514
1516 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1518 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1519 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1524
1525 // The only way we can get duplicate entries is if a column coming from a tree or data-source is Redefine'd.
1526 // RemoveDuplicates should preserve ordering of the columns: it might be meaningful.
1528
1530
1531 if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS") {
1533 }
1534
1535 return Snapshot(treename, filename, selectedColumns, options);
1536 }
1537 // clang-format on
1538
1539 // clang-format off
1540 ////////////////////////////////////////////////////////////////////////////
1541 /// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`.
1542 /// \param[in] treename The name of the output TTree or RNTuple.
1543 /// \param[in] filename The name of the output TFile.
1544 /// \param[in] columnList The list of names of the columns/branches to be written.
1545 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree/RNTuple.
1546 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1547 ///
1548 /// This function returns a `RDataFrame` built with the output TTree or RNTuple as a source.
1549 /// The types of the columns are automatically inferred and do not need to be specified.
1550 ///
1551 /// See Snapshot(std::string_view, std::string_view, const ColumnNames_t&, const RSnapshotOptions &) for a more complete description and example usages.
1553 std::initializer_list<std::string> columnList,
1554 const RSnapshotOptions &options = RSnapshotOptions())
1555 {
1557 return Snapshot(treename, filename, selectedColumns, options);
1558 }
1559 // clang-format on
1560
1561 ////////////////////////////////////////////////////////////////////////////
1562 /// \brief Save selected columns in memory.
1563 /// \tparam ColumnTypes variadic list of branch/column types.
1564 /// \param[in] columnList columns to be cached in memory.
1565 /// \return a `RDataFrame` that wraps the cached dataset.
1566 ///
1567 /// This action returns a new `RDataFrame` object, completely detached from
1568 /// the originating `RDataFrame`. The new dataframe only contains the cached
1569 /// columns and stores their content in memory for fast, zero-copy subsequent access.
1570 ///
1571 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
1572 /// fits in memory and that will be accessed many times.
1573 ///
1574 /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns
1575 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1576 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1577 /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`.
1578 ///
1579 /// ### Example usage:
1580 ///
1581 /// **Types and columns specified:**
1582 /// ~~~{.cpp}
1583 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
1584 /// ~~~
1585 ///
1586 /// **Types inferred and columns specified (this invocation relies on jitting):**
1587 /// ~~~{.cpp}
1588 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
1589 /// ~~~
1590 ///
1591 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
1592 /// ~~~{.cpp}
1593 /// auto cache_all_cols_df = df.Cache(myRegexp);
1594 /// ~~~
1595 template <typename... ColumnTypes>
1597 {
1598 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
1600 }
1601
1602 ////////////////////////////////////////////////////////////////////////////
1603 /// \brief Save selected columns in memory.
1604 /// \param[in] columnList columns to be cached in memory
1605 /// \return a `RDataFrame` that wraps the cached dataset.
1606 ///
1607 /// See the previous overloads for more information.
1609 {
1610 // Early return: if the list of columns is empty, just return an empty RDF
1611 // If we proceed, the jitted call will not compile!
1612 if (columnList.empty()) {
1613 auto nEntries = *this->Count();
1614 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
1615 return emptyRDF;
1616 }
1617
1618 std::stringstream cacheCall;
1620 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
1621 fColRegister);
1622 // build a string equivalent to
1623 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
1624 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
1625 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
1627 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
1629
1631
1632 const auto validColumnNames =
1634 const auto colTypes =
1635 GetValidatedArgTypes(validColumnNames, fColRegister, nullptr, GetDataSource(), "Cache", /*vector2RVec=*/false);
1636 for (const auto &colType : colTypes)
1637 cacheCall << colType << ", ";
1638 if (!columnListWithoutSizeColumns.empty())
1639 cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
1640 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
1642
1643 // book the code to jit with the RLoopManager and trigger the event loop
1644 fLoopManager->ToJitExec(cacheCall.str());
1645 fLoopManager->Jit();
1646
1647 return resRDF;
1648 }
1649
1650 ////////////////////////////////////////////////////////////////////////////
1651 /// \brief Save selected columns in memory.
1652 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1653 /// \return a `RDataFrame` that wraps the cached dataset.
1654 ///
1655 /// The existing columns are matched against the regular expression. If the string provided
1656 /// is empty, all columns are selected. See the previous overloads for more information.
1658 {
1661 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1663 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1664 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1666 columnNames.reserve(definedColumns.size() + dsColumns.size());
1670 return Cache(selectedColumns);
1671 }
1672
1673 ////////////////////////////////////////////////////////////////////////////
1674 /// \brief Save selected columns in memory.
1675 /// \param[in] columnList columns to be cached in memory.
1676 /// \return a `RDataFrame` that wraps the cached dataset.
1677 ///
1678 /// See the previous overloads for more information.
1679 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
1680 {
1682 return Cache(selectedColumns);
1683 }
1684
1685 // clang-format off
1686 ////////////////////////////////////////////////////////////////////////////
1687 /// \brief Creates a node that filters entries based on range: [begin, end).
1688 /// \param[in] begin Initial entry number considered for this range.
1689 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1690 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
1691 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
1692 ///
1693 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
1694 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
1695 ///
1696 /// ### Example usage:
1697 /// ~~~{.cpp}
1698 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
1699 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
1700 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
1701 /// ~~~
1702 // clang-format on
1703 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
1704 {
1705 // check invariants
1706 if (stride == 0 || (end != 0 && end < begin))
1707 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
1708 CheckIMTDisabled("Range");
1709
1710 using Range_t = RDFDetail::RRange<Proxied>;
1711 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
1713 return newInterface;
1714 }
1715
1716 // clang-format off
1717 ////////////////////////////////////////////////////////////////////////////
1718 /// \brief Creates a node that filters entries based on range.
1719 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1720 /// \return a node of the computation graph for which the range is defined.
1721 ///
1722 /// See the other Range overload for a detailed description.
1723 // clang-format on
1724 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
1725
1726 // clang-format off
1727 ////////////////////////////////////////////////////////////////////////////
1728 /// \brief Execute a user-defined function on each entry (*instant action*).
1729 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1730 /// \param[in] columns Names of the columns/branches in input to the user function.
1731 ///
1732 /// The callable `f` is invoked once per entry. This is an *instant action*:
1733 /// upon invocation, an event loop as well as execution of all scheduled actions
1734 /// is triggered.
1735 /// Users are responsible for the thread-safety of this callable when executing
1736 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
1737 ///
1738 /// ### Example usage:
1739 /// ~~~{.cpp}
1740 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
1741 /// ~~~
1742 // clang-format on
1743 template <typename F>
1744 void Foreach(F f, const ColumnNames_t &columns = {})
1745 {
1746 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
1747 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
1748 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
1749 }
1750
1751 // clang-format off
1752 ////////////////////////////////////////////////////////////////////////////
1753 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*).
1754 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1755 /// \param[in] columns Names of the columns/branches in input to the user function.
1756 ///
1757 /// Same as `Foreach`, but the user-defined function takes an extra
1758 /// `unsigned int` as its first parameter, the *processing slot index*.
1759 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
1760 /// for each thread of execution.
1761 /// This is meant as a helper in writing thread-safe `Foreach`
1762 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
1763 /// The user-defined processing callable is able to follow different
1764 /// *streams of processing* indexed by the first parameter.
1765 /// `ForeachSlot` works just as well with single-thread execution: in that
1766 /// case `slot` will always be `0`.
1767 ///
1768 /// ### Example usage:
1769 /// ~~~{.cpp}
1770 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
1771 /// ~~~
1772 // clang-format on
1773 template <typename F>
1774 void ForeachSlot(F f, const ColumnNames_t &columns = {})
1775 {
1777 constexpr auto nColumns = ColTypes_t::list_size;
1778
1781
1782 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
1784
1785 auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister);
1786
1787 fLoopManager->Run();
1788 }
1789
1790 // clang-format off
1791 ////////////////////////////////////////////////////////////////////////////
1792 /// \brief Execute a user-defined reduce operation on the values of a column.
1793 /// \tparam F The type of the reduce callable. Automatically deduced.
1794 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1795 /// \param[in] f A callable with signature `T(T,T)`
1796 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1797 /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr.
1798 ///
1799 /// A reduction takes two values of a column and merges them into one (e.g.
1800 /// by summing them, taking the maximum, etc). This action performs the
1801 /// specified reduction operation on all processed column values, returning
1802 /// a single value of the same type. The callable f must satisfy the general
1803 /// requirements of a *processing function* besides having signature `T(T,T)`
1804 /// where `T` is the type of column columnName.
1805 ///
1806 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
1807 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
1808 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
1809 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
1810 /// overload.
1811 ///
1812 /// ### Example usage:
1813 /// ~~~{.cpp}
1814 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
1815 /// ~~~
1816 ///
1817 /// This action is *lazy*: upon invocation of this method the calculation is
1818 /// booked but not executed. Also see RResultPtr.
1819 // clang-format on
1821 RResultPtr<T> Reduce(F f, std::string_view columnName = "")
1822 {
1823 static_assert(
1824 std::is_default_constructible<T>::value,
1825 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
1826 return Reduce(std::move(f), columnName, T());
1827 }
1828
1829 ////////////////////////////////////////////////////////////////////////////
1830 /// \brief Execute a user-defined reduce operation on the values of a column.
1831 /// \tparam F The type of the reduce callable. Automatically deduced.
1832 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1833 /// \param[in] f A callable with signature `T(T,T)`
1834 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1835 /// \param[in] redIdentity The reduced object of each thread is initialized to this value.
1836 /// \return the reduced quantity wrapped in a RResultPtr.
1837 ///
1838 /// ### Example usage:
1839 /// ~~~{.cpp}
1840 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
1841 /// ~~~
1842 /// See the description of the first Reduce overload for more information.
1844 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
1845 {
1846 return Aggregate(f, f, columnName, redIdentity);
1847 }
1848
1849 ////////////////////////////////////////////////////////////////////////////
1850 /// \brief Return the number of entries processed (*lazy action*).
1851 /// \return the number of entries wrapped in a RResultPtr.
1852 ///
1853 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
1854 /// This action is *lazy*: upon invocation of this method the calculation is
1855 /// booked but not executed. Also see RResultPtr.
1856 ///
1857 /// ### Example usage:
1858 /// ~~~{.cpp}
1859 /// auto nEntriesAfterCuts = myFilteredDf.Count();
1860 /// ~~~
1861 ///
1863 {
1864 const auto nSlots = fLoopManager->GetNSlots();
1865 auto cSPtr = std::make_shared<ULong64_t>(0);
1866 using Helper_t = RDFInternal::CountHelper;
1868 auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
1870 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
1871 }
1872
1873 ////////////////////////////////////////////////////////////////////////////
1874 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default).
1875 /// \tparam T The type of the column.
1876 /// \tparam COLL The type of collection used to store the values.
1877 /// \param[in] column The name of the column to collect the values of.
1878 /// \return the content of the selected column wrapped in a RResultPtr.
1879 ///
1880 /// The collection type to be specified for C-style array columns is `RVec<T>`:
1881 /// in this case the returned collection is a `std::vector<RVec<T>>`.
1882 /// ### Example usage:
1883 /// ~~~{.cpp}
1884 /// // In this case intCol is a std::vector<int>
1885 /// auto intCol = rdf.Take<int>("integerColumn");
1886 /// // Same content as above but in this case taken as a RVec<int>
1887 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
1888 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
1889 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
1890 /// ~~~
1891 /// This action is *lazy*: upon invocation of this method the calculation is
1892 /// booked but not executed. Also see RResultPtr.
1893 template <typename T, typename COLL = std::vector<T>>
1894 RResultPtr<COLL> Take(std::string_view column = "")
1895 {
1896 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
1897
1900
1901 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1903 auto valuesPtr = std::make_shared<COLL>();
1904 const auto nSlots = fLoopManager->GetNSlots();
1905
1906 auto action =
1907 std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister);
1908 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
1909 }
1910
1911 ////////////////////////////////////////////////////////////////////////////
1912 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1913 /// \tparam V The type of the column used to fill the histogram.
1914 /// \param[in] model The returned histogram will be constructed using this as a model.
1915 /// \param[in] vName The name of the column that will fill the histogram.
1916 /// \return the monodimensional histogram wrapped in a RResultPtr.
1917 ///
1918 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
1919 /// is filled with each one of the elements of the container. In case multiple columns of container type
1920 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1921 /// possibly different lengths between events).
1922 /// This action is *lazy*: upon invocation of this method the calculation is
1923 /// booked but not executed. Also see RResultPtr.
1924 ///
1925 /// ### Example usage:
1926 /// ~~~{.cpp}
1927 /// // Deduce column type (this invocation needs jitting internally)
1928 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1929 /// // Explicit column type
1930 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1931 /// ~~~
1932 ///
1933 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1934 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1935 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1936 template <typename V = RDFDetail::RInferredType>
1937 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
1938 {
1939 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
1940
1942
1943 std::shared_ptr<::TH1D> h(nullptr);
1944 {
1945 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1946 h = model.GetHistogram();
1947 }
1948
1949 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
1950 h->SetCanExtend(::TH1::kAllAxes);
1952 }
1953
1954 ////////////////////////////////////////////////////////////////////////////
1955 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1956 /// \tparam V The type of the column used to fill the histogram.
1957 /// \param[in] vName The name of the column that will fill the histogram.
1958 /// \return the monodimensional histogram wrapped in a RResultPtr.
1959 ///
1960 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1961 /// The "name" and "title" strings are built starting from the input column name.
1962 /// See the description of the first Histo1D() overload for more details.
1963 ///
1964 /// ### Example usage:
1965 /// ~~~{.cpp}
1966 /// // Deduce column type (this invocation needs jitting internally)
1967 /// auto myHist1 = myDf.Histo1D("myColumn");
1968 /// // Explicit column type
1969 /// auto myHist2 = myDf.Histo1D<float>("myColumn");
1970 /// ~~~
1971 template <typename V = RDFDetail::RInferredType>
1973 {
1974 const auto h_name = std::string(vName);
1975 const auto h_title = h_name + ";" + h_name + ";count";
1976 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1977 }
1978
1979 ////////////////////////////////////////////////////////////////////////////
1980 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1981 /// \tparam V The type of the column used to fill the histogram.
1982 /// \tparam W The type of the column used as weights.
1983 /// \param[in] model The returned histogram will be constructed using this as a model.
1984 /// \param[in] vName The name of the column that will fill the histogram.
1985 /// \param[in] wName The name of the column that will provide the weights.
1986 /// \return the monodimensional histogram wrapped in a RResultPtr.
1987 ///
1988 /// See the description of the first Histo1D() overload for more details.
1989 ///
1990 /// ### Example usage:
1991 /// ~~~{.cpp}
1992 /// // Deduce column type (this invocation needs jitting internally)
1993 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1994 /// // Explicit column type
1995 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1996 /// ~~~
1997 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1998 RResultPtr<::TH1D> Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
1999 {
2000 const std::vector<std::string_view> columnViews = {vName, wName};
2002 ? ColumnNames_t()
2004 std::shared_ptr<::TH1D> h(nullptr);
2005 {
2006 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2007 h = model.GetHistogram();
2008 }
2009
2010 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
2011 h->SetCanExtend(::TH1::kAllAxes);
2013 }
2014
2015 ////////////////////////////////////////////////////////////////////////////
2016 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
2017 /// \tparam V The type of the column used to fill the histogram.
2018 /// \tparam W The type of the column used as weights.
2019 /// \param[in] vName The name of the column that will fill the histogram.
2020 /// \param[in] wName The name of the column that will provide the weights.
2021 /// \return the monodimensional histogram wrapped in a RResultPtr.
2022 ///
2023 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
2024 /// The "name" and "title" strings are built starting from the input column names.
2025 /// See the description of the first Histo1D() overload for more details.
2026 ///
2027 /// ### Example usage:
2028 /// ~~~{.cpp}
2029 /// // Deduce column types (this invocation needs jitting internally)
2030 /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
2031 /// // Explicit column types
2032 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
2033 /// ~~~
2034 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2035 RResultPtr<::TH1D> Histo1D(std::string_view vName, std::string_view wName)
2036 {
2037 // We build name and title based on the value and weight column names
2038 std::string str_vName{vName};
2039 std::string str_wName{wName};
2040 const auto h_name = str_vName + "_weighted_" + str_wName;
2041 const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
2042 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
2043 }
2044
2045 ////////////////////////////////////////////////////////////////////////////
2046 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
2047 /// \tparam V The type of the column used to fill the histogram.
2048 /// \tparam W The type of the column used as weights.
2049 /// \param[in] model The returned histogram will be constructed using this as a model.
2050 /// \return the monodimensional histogram wrapped in a RResultPtr.
2051 ///
2052 /// This overload will use the first two default columns as column names.
2053 /// See the description of the first Histo1D() overload for more details.
2054 template <typename V, typename W>
2055 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
2056 {
2057 return Histo1D<V, W>(model, "", "");
2058 }
2059
2060 ////////////////////////////////////////////////////////////////////////////
2061 /// \brief Fill and return a two-dimensional histogram (*lazy action*).
2062 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
2063 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
2064 /// \param[in] model The returned histogram will be constructed using this as a model.
2065 /// \param[in] v1Name The name of the column that will fill the x axis.
2066 /// \param[in] v2Name The name of the column that will fill the y axis.
2067 /// \return the bidimensional histogram wrapped in a RResultPtr.
2068 ///
2069 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
2070 /// is filled with each one of the elements of the container. In case multiple columns of container type
2071 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
2072 /// possibly different lengths between events).
2073 /// This action is *lazy*: upon invocation of this method the calculation is
2074 /// booked but not executed. Also see RResultPtr.
2075 ///
2076 /// ### Example usage:
2077 /// ~~~{.cpp}
2078 /// // Deduce column types (this invocation needs jitting internally)
2079 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
2080 /// // Explicit column types
2081 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
2082 /// ~~~
2083 ///
2084 ///
2085 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
2086 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2087 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2088 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
2089 RResultPtr<::TH2D> Histo2D(const TH2DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
2090 {
2091 std::shared_ptr<::TH2D> h(nullptr);
2092 {
2093 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2094 h = model.GetHistogram();
2095 }
2096 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
2097 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
2098 }
2099 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2101 ? ColumnNames_t()
2104 }
2105
2106 ////////////////////////////////////////////////////////////////////////////
2107 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*).
2108 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
2109 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
2110 /// \tparam W The type of the column used for the weights of the histogram.
2111 /// \param[in] model The returned histogram will be constructed using this as a model.
2112 /// \param[in] v1Name The name of the column that will fill the x axis.
2113 /// \param[in] v2Name The name of the column that will fill the y axis.
2114 /// \param[in] wName The name of the column that will provide the weights.
2115 /// \return the bidimensional histogram wrapped in a RResultPtr.
2116 ///
2117 /// This action is *lazy*: upon invocation of this method the calculation is
2118 /// booked but not executed. Also see RResultPtr.
2119 ///
2120 /// ### Example usage:
2121 /// ~~~{.cpp}
2122 /// // Deduce column types (this invocation needs jitting internally)
2123 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
2124 /// // Explicit column types
2125 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
2126 /// ~~~
2127 ///
2128 /// See the documentation of the first Histo2D() overload for more details.
2129 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2130 typename W = RDFDetail::RInferredType>
2132 Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
2133 {
2134 std::shared_ptr<::TH2D> h(nullptr);
2135 {
2136 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2137 h = model.GetHistogram();
2138 }
2139 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
2140 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
2141 }
2142 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2144 ? ColumnNames_t()
2147 }
2148
2149 template <typename V1, typename V2, typename W>
2151 {
2152 return Histo2D<V1, V2, W>(model, "", "", "");
2153 }
2154
2155 ////////////////////////////////////////////////////////////////////////////
2156 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
2157 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2158 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2159 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2160 /// \param[in] model The returned histogram will be constructed using this as a model.
2161 /// \param[in] v1Name The name of the column that will fill the x axis.
2162 /// \param[in] v2Name The name of the column that will fill the y axis.
2163 /// \param[in] v3Name The name of the column that will fill the z axis.
2164 /// \return the tridimensional histogram wrapped in a RResultPtr.
2165 ///
2166 /// This action is *lazy*: upon invocation of this method the calculation is
2167 /// booked but not executed. Also see RResultPtr.
2168 ///
2169 /// ### Example usage:
2170 /// ~~~{.cpp}
2171 /// // Deduce column types (this invocation needs jitting internally)
2172 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2173 /// "myValueX", "myValueY", "myValueZ");
2174 /// // Explicit column types
2175 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2176 /// "myValueX", "myValueY", "myValueZ");
2177 /// ~~~
2178 /// \note If three-dimensional histograms consume too much memory in multithreaded runs, the cloning of TH3D
2179 /// per thread can be reduced using ROOT::RDF::Experimental::ThreadsPerTH3(). See the section "Memory Usage" in
2180 /// the RDataFrame description.
2181 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
2182 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2183 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2184 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2185 typename V3 = RDFDetail::RInferredType>
2186 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name = "", std::string_view v2Name = "",
2187 std::string_view v3Name = "")
2188 {
2189 std::shared_ptr<::TH3D> h(nullptr);
2190 {
2191 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2192 h = model.GetHistogram();
2193 }
2194 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
2195 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
2196 }
2197 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2199 ? ColumnNames_t()
2202 }
2203
2204 ////////////////////////////////////////////////////////////////////////////
2205 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
2206 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2207 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2208 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2209 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2210 /// \param[in] model The returned histogram will be constructed using this as a model.
2211 /// \param[in] v1Name The name of the column that will fill the x axis.
2212 /// \param[in] v2Name The name of the column that will fill the y axis.
2213 /// \param[in] v3Name The name of the column that will fill the z axis.
2214 /// \param[in] wName The name of the column that will provide the weights.
2215 /// \return the tridimensional histogram wrapped in a RResultPtr.
2216 ///
2217 /// This action is *lazy*: upon invocation of this method the calculation is
2218 /// booked but not executed. Also see RResultPtr.
2219 ///
2220 /// ### Example usage:
2221 /// ~~~{.cpp}
2222 /// // Deduce column types (this invocation needs jitting internally)
2223 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2224 /// "myValueX", "myValueY", "myValueZ", "myWeight");
2225 /// // Explicit column types
2226 /// using d_t = double;
2227 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2228 /// "myValueX", "myValueY", "myValueZ", "myWeight");
2229 /// ~~~
2230 ///
2231 ///
2232 /// See the documentation of the first Histo2D() overload for more details.
2233 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2234 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2235 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name,
2236 std::string_view v3Name, std::string_view wName)
2237 {
2238 std::shared_ptr<::TH3D> h(nullptr);
2239 {
2240 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2241 h = model.GetHistogram();
2242 }
2243 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
2244 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
2245 }
2246 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2248 ? ColumnNames_t()
2251 }
2252
2253 template <typename V1, typename V2, typename V3, typename W>
2255 {
2256 return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
2257 }
2258
2259 ////////////////////////////////////////////////////////////////////////////
2260 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
2261 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not
2262 /// present.
2263 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the
2264 /// object.
2265 /// \param[in] model The returned histogram will be constructed using this as a model.
2266 /// \param[in] columnList
2267 /// A list containing the names of the columns that will be passed when calling `Fill`.
2268 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
2269 /// \return the N-dimensional histogram wrapped in a RResultPtr.
2270 ///
2271 /// This action is *lazy*: upon invocation of this method the calculation is
2272 /// booked but not executed. See RResultPtr documentation.
2273 ///
2274 /// ### Example usage:
2275 /// ~~~{.cpp}
2276 /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4,
2277 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2278 /// {"col0", "col1", "col2", "col3"});
2279 /// ~~~
2280 ///
2281 template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads
2283 {
2284 std::shared_ptr<::THnD> h(nullptr);
2285 {
2286 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2287 h = model.GetHistogram();
2288
2289 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2290 h->Sumw2();
2291 } else if (int(columnList.size()) != h->GetNdimensions()) {
2292 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2293 }
2294 }
2295 return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h,
2296 fProxiedPtr);
2297 }
2298
2299 ////////////////////////////////////////////////////////////////////////////
2300 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
2301 /// \param[in] model The returned histogram will be constructed using this as a model.
2302 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2303 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
2304 /// \return the N-dimensional histogram wrapped in a RResultPtr.
2305 ///
2306 /// This action is *lazy*: upon invocation of this method the calculation is
2307 /// booked but not executed. Also see RResultPtr.
2308 ///
2309 /// ### Example usage:
2310 /// ~~~{.cpp}
2311 /// auto myFilledObj = myDf.HistoND({"name","title", 4,
2312 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2313 /// {"col0", "col1", "col2", "col3"});
2314 /// ~~~
2315 ///
2317 {
2318 std::shared_ptr<::THnD> h(nullptr);
2319 {
2320 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2321 h = model.GetHistogram();
2322
2323 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2324 h->Sumw2();
2325 } else if (int(columnList.size()) != h->GetNdimensions()) {
2326 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2327 }
2328 }
2330 columnList.size());
2331 }
2332
2333 ////////////////////////////////////////////////////////////////////////////
2334 /// \brief Fill and return a sparse N-dimensional histogram (*lazy action*).
2335 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not
2336 /// present.
2337 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the
2338 /// object.
2339 /// \param[in] model The returned histogram will be constructed using this as a model.
2340 /// \param[in] columnList
2341 /// A list containing the names of the columns that will be passed when calling `Fill`.
2342 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
2343 /// \return the N-dimensional histogram wrapped in a RResultPtr.
2344 ///
2345 /// This action is *lazy*: upon invocation of this method the calculation is
2346 /// booked but not executed. See RResultPtr documentation.
2347 ///
2348 /// ### Example usage:
2349 /// ~~~{.cpp}
2350 /// auto myFilledObj = myDf.HistoNSparseD<float, float, float, float>({"name","title", 4,
2351 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2352 /// {"col0", "col1", "col2", "col3"});
2353 /// ~~~
2354 ///
2355 template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads
2357 {
2358 std::shared_ptr<::THnSparseD> h(nullptr);
2359 {
2360 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2361 h = model.GetHistogram();
2362
2363 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2364 h->Sumw2();
2365 } else if (int(columnList.size()) != h->GetNdimensions()) {
2366 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2367 }
2368 }
2369 return CreateAction<RDFInternal::ActionTags::HistoNSparseD, FirstColumn, OtherColumns...>(columnList, h, h,
2370 fProxiedPtr);
2371 }
2372
2373 ////////////////////////////////////////////////////////////////////////////
2374 /// \brief Fill and return a sparse N-dimensional histogram (*lazy action*).
2375 /// \param[in] model The returned histogram will be constructed using this as a model.
2376 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2377 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
2378 /// \return the N-dimensional histogram wrapped in a RResultPtr.
2379 ///
2380 /// This action is *lazy*: upon invocation of this method the calculation is
2381 /// booked but not executed. Also see RResultPtr.
2382 ///
2383 /// ### Example usage:
2384 /// ~~~{.cpp}
2385 /// auto myFilledObj = myDf.HistoNSparseD({"name","title", 4,
2386 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2387 /// {"col0", "col1", "col2", "col3"});
2388 /// ~~~
2389 ///
2391 {
2392 std::shared_ptr<::THnSparseD> h(nullptr);
2393 {
2394 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2395 h = model.GetHistogram();
2396
2397 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2398 h->Sumw2();
2399 } else if (int(columnList.size()) != h->GetNdimensions()) {
2400 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2401 }
2402 }
2404 columnList, h, h, fProxiedPtr, columnList.size());
2405 }
2406
2407 ////////////////////////////////////////////////////////////////////////////
2408 /// \brief Fill and return a TGraph object (*lazy action*).
2409 /// \tparam X The type of the column used to fill the x axis.
2410 /// \tparam Y The type of the column used to fill the y axis.
2411 /// \param[in] x The name of the column that will fill the x axis.
2412 /// \param[in] y The name of the column that will fill the y axis.
2413 /// \return the TGraph wrapped in a RResultPtr.
2414 ///
2415 /// Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph
2416 /// is filled with each one of the elements of the container.
2417 /// If Multithreading is enabled, the order in which points are inserted is undefined.
2418 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
2419 /// A name and a title to the TGraph is given based on the input column names.
2420 ///
2421 /// This action is *lazy*: upon invocation of this method the calculation is
2422 /// booked but not executed. Also see RResultPtr.
2423 ///
2424 /// ### Example usage:
2425 /// ~~~{.cpp}
2426 /// // Deduce column types (this invocation needs jitting internally)
2427 /// auto myGraph1 = myDf.Graph("xValues", "yValues");
2428 /// // Explicit column types
2429 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
2430 /// ~~~
2431 ///
2432 /// \note Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory
2433 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2434 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2435 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType>
2436 RResultPtr<::TGraph> Graph(std::string_view x = "", std::string_view y = "")
2437 {
2438 auto graph = std::make_shared<::TGraph>();
2439 const std::vector<std::string_view> columnViews = {x, y};
2441 ? ColumnNames_t()
2443
2445
2446 // We build a default name and title based on the input columns
2447 const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
2448 const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
2449 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2450 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2451 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2452
2454 }
2455
2456 ////////////////////////////////////////////////////////////////////////////
2457 /// \brief Fill and return a TGraphAsymmErrors object (*lazy action*).
2458 /// \param[in] x The name of the column that will fill the x axis.
2459 /// \param[in] y The name of the column that will fill the y axis.
2460 /// \param[in] exl The name of the column of X low errors
2461 /// \param[in] exh The name of the column of X high errors
2462 /// \param[in] eyl The name of the column of Y low errors
2463 /// \param[in] eyh The name of the column of Y high errors
2464 /// \return the TGraphAsymmErrors wrapped in a RResultPtr.
2465 ///
2466 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
2467 /// is filled with each one of the elements of the container.
2468 /// If Multithreading is enabled, the order in which points are inserted is undefined.
2469 ///
2470 /// This action is *lazy*: upon invocation of this method the calculation is
2471 /// booked but not executed. Also see RResultPtr.
2472 ///
2473 /// ### Example usage:
2474 /// ~~~{.cpp}
2475 /// // Deduce column types (this invocation needs jitting internally)
2476 /// auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2477 /// // Explicit column types
2478 /// using f = float
2479 /// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2480 /// ~~~
2481 ///
2482 /// `GraphAsymmErrors` should also be used for the cases in which values associated only with
2483 /// one of the axes have associated errors. For example, only `ey` exist and `ex` are equal to zero.
2484 /// In such cases, user should do the following:
2485 /// ~~~{.cpp}
2486 /// // Create a column of zeros in RDataFrame
2487 /// auto rdf_withzeros = rdf.Define("zero", "0");
2488 /// // or alternatively:
2489 /// auto rdf_withzeros = rdf.Define("zero", []() -> double { return 0.;});
2490 /// // Create the graph with y errors only
2491 /// auto rdf_errorsOnYOnly = rdf_withzeros.GraphAsymmErrors("xValues", "yValues", "zero", "zero", "eyl", "eyh");
2492 /// ~~~
2493 ///
2494 /// \note Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory
2495 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2496 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2497 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType,
2501 GraphAsymmErrors(std::string_view x = "", std::string_view y = "", std::string_view exl = "",
2502 std::string_view exh = "", std::string_view eyl = "", std::string_view eyh = "")
2503 {
2504 auto graph = std::make_shared<::TGraphAsymmErrors>();
2505 const std::vector<std::string_view> columnViews = {x, y, exl, exh, eyl, eyh};
2507 ? ColumnNames_t()
2509
2511
2512 // We build a default name and title based on the input columns
2513 const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
2514 const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
2515 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2516 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2517 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2518
2520 graph, fProxiedPtr);
2521 }
2522
2523 ////////////////////////////////////////////////////////////////////////////
2524 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2525 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2526 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2527 /// \param[in] model The model to be considered to build the new return value.
2528 /// \param[in] v1Name The name of the column that will fill the x axis.
2529 /// \param[in] v2Name The name of the column that will fill the y axis.
2530 /// \return the monodimensional profile wrapped in a RResultPtr.
2531 ///
2532 /// This action is *lazy*: upon invocation of this method the calculation is
2533 /// booked but not executed. Also see RResultPtr.
2534 ///
2535 /// ### Example usage:
2536 /// ~~~{.cpp}
2537 /// // Deduce column types (this invocation needs jitting internally)
2538 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2539 /// // Explicit column types
2540 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2541 /// ~~~
2542 ///
2543 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2544 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2545 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2546 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
2548 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
2549 {
2550 std::shared_ptr<::TProfile> h(nullptr);
2551 {
2552 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2553 h = model.GetProfile();
2554 }
2555
2556 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2557 throw std::runtime_error("Profiles with no axes limits are not supported yet.");
2558 }
2559 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2561 ? ColumnNames_t()
2564 }
2565
2566 ////////////////////////////////////////////////////////////////////////////
2567 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2568 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2569 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2570 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
2571 /// \param[in] model The model to be considered to build the new return value.
2572 /// \param[in] v1Name The name of the column that will fill the x axis.
2573 /// \param[in] v2Name The name of the column that will fill the y axis.
2574 /// \param[in] wName The name of the column that will provide the weights.
2575 /// \return the monodimensional profile wrapped in a RResultPtr.
2576 ///
2577 /// This action is *lazy*: upon invocation of this method the calculation is
2578 /// booked but not executed. Also see RResultPtr.
2579 ///
2580 /// ### Example usage:
2581 /// ~~~{.cpp}
2582 /// // Deduce column types (this invocation needs jitting internally)
2583 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
2584 /// // Explicit column types
2585 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
2586 /// "xValues", "yValues", "weight");
2587 /// ~~~
2588 ///
2589 /// See the first Profile1D() overload for more details.
2590 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2591 typename W = RDFDetail::RInferredType>
2593 Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
2594 {
2595 std::shared_ptr<::TProfile> h(nullptr);
2596 {
2597 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2598 h = model.GetProfile();
2599 }
2600
2601 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2602 throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
2603 }
2604 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2606 ? ColumnNames_t()
2609 }
2610
2611 ////////////////////////////////////////////////////////////////////////////
2612 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2613 /// See the first Profile1D() overload for more details.
2614 template <typename V1, typename V2, typename W>
2616 {
2617 return Profile1D<V1, V2, W>(model, "", "", "");
2618 }
2619
2620 ////////////////////////////////////////////////////////////////////////////
2621 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2622 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2623 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2624 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2625 /// \param[in] model The returned profile will be constructed using this as a model.
2626 /// \param[in] v1Name The name of the column that will fill the x axis.
2627 /// \param[in] v2Name The name of the column that will fill the y axis.
2628 /// \param[in] v3Name The name of the column that will fill the z axis.
2629 /// \return the bidimensional profile wrapped in a RResultPtr.
2630 ///
2631 /// This action is *lazy*: upon invocation of this method the calculation is
2632 /// booked but not executed. Also see RResultPtr.
2633 ///
2634 /// ### Example usage:
2635 /// ~~~{.cpp}
2636 /// // Deduce column types (this invocation needs jitting internally)
2637 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2638 /// "xValues", "yValues", "zValues");
2639 /// // Explicit column types
2640 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2641 /// "xValues", "yValues", "zValues");
2642 /// ~~~
2643 ///
2644 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2645 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2646 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2647 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2648 typename V3 = RDFDetail::RInferredType>
2649 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name = "",
2650 std::string_view v2Name = "", std::string_view v3Name = "")
2651 {
2652 std::shared_ptr<::TProfile2D> h(nullptr);
2653 {
2654 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2655 h = model.GetProfile();
2656 }
2657
2658 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2659 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2660 }
2661 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2663 ? ColumnNames_t()
2666 }
2667
2668 ////////////////////////////////////////////////////////////////////////////
2669 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2670 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2671 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2672 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2673 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2674 /// \param[in] model The returned histogram will be constructed using this as a model.
2675 /// \param[in] v1Name The name of the column that will fill the x axis.
2676 /// \param[in] v2Name The name of the column that will fill the y axis.
2677 /// \param[in] v3Name The name of the column that will fill the z axis.
2678 /// \param[in] wName The name of the column that will provide the weights.
2679 /// \return the bidimensional profile wrapped in a RResultPtr.
2680 ///
2681 /// This action is *lazy*: upon invocation of this method the calculation is
2682 /// booked but not executed. Also see RResultPtr.
2683 ///
2684 /// ### Example usage:
2685 /// ~~~{.cpp}
2686 /// // Deduce column types (this invocation needs jitting internally)
2687 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2688 /// "xValues", "yValues", "zValues", "weight");
2689 /// // Explicit column types
2690 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2691 /// "xValues", "yValues", "zValues", "weight");
2692 /// ~~~
2693 ///
2694 /// See the first Profile2D() overload for more details.
2695 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2696 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2697 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name,
2698 std::string_view v3Name, std::string_view wName)
2699 {
2700 std::shared_ptr<::TProfile2D> h(nullptr);
2701 {
2702 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2703 h = model.GetProfile();
2704 }
2705
2706 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2707 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2708 }
2709 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2711 ? ColumnNames_t()
2714 }
2715
2716 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2717 /// See the first Profile2D() overload for more details.
2718 template <typename V1, typename V2, typename V3, typename W>
2720 {
2721 return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
2722 }
2723
2724 ////////////////////////////////////////////////////////////////////////////
2725 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*).
2726 ///
2727 /// Type T must provide at least:
2728 /// - a copy-constructor
2729 /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList
2730 /// (these types can also be passed as template parameters to this method)
2731 /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the
2732 /// objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that
2733 /// if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in
2734 /// the TCollection*).
2735 ///
2736 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
2737 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
2738 /// \tparam T The type of the object to fill. Automatically deduced.
2739 /// \param[in] model The model to be considered to build the new return value.
2740 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2741 /// \return the filled object wrapped in a RResultPtr.
2742 ///
2743 /// The user gives up ownership of the model object.
2744 /// The list of column names to be used for filling must always be specified.
2745 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
2746 /// Also see RResultPtr.
2747 ///
2748 /// ### Example usage:
2749 /// ~~~{.cpp}
2750 /// MyClass obj;
2751 /// // Deduce column types (this invocation needs jitting internally, and in this case
2752 /// // MyClass needs to be known to the interpreter)
2753 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
2754 /// // explicit column types
2755 /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});
2756 /// ~~~
2757 ///
2758 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T>
2760 {
2761 auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model));
2762 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
2763 throw std::runtime_error("The absence of axes limits is not supported yet.");
2764 }
2765 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h, fProxiedPtr,
2766 columnList.size());
2767 }
2768
2769 ////////////////////////////////////////////////////////////////////////////
2770 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2771 ///
2772 /// \tparam V The type of the value column
2773 /// \param[in] value The name of the column with the values to fill the statistics with.
2774 /// \return the filled TStatistic object wrapped in a RResultPtr.
2775 ///
2776 /// ### Example usage:
2777 /// ~~~{.cpp}
2778 /// // Deduce column type (this invocation needs jitting internally)
2779 /// auto stats0 = myDf.Stats("values");
2780 /// // Explicit column type
2781 /// auto stats1 = myDf.Stats<float>("values");
2782 /// ~~~
2783 ///
2784 template <typename V = RDFDetail::RInferredType>
2785 RResultPtr<TStatistic> Stats(std::string_view value = "")
2786 {
2788 if (!value.empty()) {
2789 columns.emplace_back(std::string(value));
2790 }
2792 if (std::is_same<V, RDFDetail::RInferredType>::value) {
2793 return Fill(TStatistic(), validColumnNames);
2794 } else {
2796 }
2797 }
2798
2799 ////////////////////////////////////////////////////////////////////////////
2800 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2801 ///
2802 /// \tparam V The type of the value column
2803 /// \tparam W The type of the weight column
2804 /// \param[in] value The name of the column with the values to fill the statistics with.
2805 /// \param[in] weight The name of the column with the weights to fill the statistics with.
2806 /// \return the filled TStatistic object wrapped in a RResultPtr.
2807 ///
2808 /// ### Example usage:
2809 /// ~~~{.cpp}
2810 /// // Deduce column types (this invocation needs jitting internally)
2811 /// auto stats0 = myDf.Stats("values", "weights");
2812 /// // Explicit column types
2813 /// auto stats1 = myDf.Stats<int, float>("values", "weights");
2814 /// ~~~
2815 ///
2816 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2817 RResultPtr<TStatistic> Stats(std::string_view value, std::string_view weight)
2818 {
2819 ColumnNames_t columns{std::string(value), std::string(weight)};
2820 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
2821 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
2823 // We have 3 cases:
2824 // 1. Both types are inferred: we use Fill and let the jit kick in.
2825 // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
2826 // 3. Both types are explicit: we invoke the fully compiled Fill method.
2827 if (vIsInferred && wIsInferred) {
2828 return Fill(TStatistic(), validColumnNames);
2829 } else if (vIsInferred != wIsInferred) {
2830 std::string error("The ");
2831 error += vIsInferred ? "value " : "weight ";
2832 error += "column type is explicit, while the ";
2833 error += vIsInferred ? "weight " : "value ";
2834 error += " is specified to be inferred. This case is not supported: please specify both types or none.";
2835 throw std::runtime_error(error);
2836 } else {
2838 }
2839 }
2840
2841 ////////////////////////////////////////////////////////////////////////////
2842 /// \brief Return the minimum of processed column values (*lazy action*).
2843 /// \tparam T The type of the branch/column.
2844 /// \param[in] columnName The name of the branch/column to be treated.
2845 /// \return the minimum value of the selected column wrapped in a RResultPtr.
2846 ///
2847 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2848 /// template specialization of this method.
2849 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2850 ///
2851 /// This action is *lazy*: upon invocation of this method the calculation is
2852 /// booked but not executed. Also see RResultPtr.
2853 ///
2854 /// ### Example usage:
2855 /// ~~~{.cpp}
2856 /// // Deduce column type (this invocation needs jitting internally)
2857 /// auto minVal0 = myDf.Min("values");
2858 /// // Explicit column type
2859 /// auto minVal1 = myDf.Min<double>("values");
2860 /// ~~~
2861 ///
2862 template <typename T = RDFDetail::RInferredType>
2864 {
2865 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2866 using RetType_t = RDFDetail::MinReturnType_t<T>;
2867 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
2869 }
2870
2871 ////////////////////////////////////////////////////////////////////////////
2872 /// \brief Return the maximum of processed column values (*lazy action*).
2873 /// \tparam T The type of the branch/column.
2874 /// \param[in] columnName The name of the branch/column to be treated.
2875 /// \return the maximum value of the selected column wrapped in a RResultPtr.
2876 ///
2877 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2878 /// template specialization of this method.
2879 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2880 ///
2881 /// This action is *lazy*: upon invocation of this method the calculation is
2882 /// booked but not executed. Also see RResultPtr.
2883 ///
2884 /// ### Example usage:
2885 /// ~~~{.cpp}
2886 /// // Deduce column type (this invocation needs jitting internally)
2887 /// auto maxVal0 = myDf.Max("values");
2888 /// // Explicit column type
2889 /// auto maxVal1 = myDf.Max<double>("values");
2890 /// ~~~
2891 ///
2892 template <typename T = RDFDetail::RInferredType>
2894 {
2895 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2896 using RetType_t = RDFDetail::MaxReturnType_t<T>;
2897 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
2899 }
2900
2901 ////////////////////////////////////////////////////////////////////////////
2902 /// \brief Return the mean of processed column values (*lazy action*).
2903 /// \tparam T The type of the branch/column.
2904 /// \param[in] columnName The name of the branch/column to be treated.
2905 /// \return the mean value of the selected column wrapped in a RResultPtr.
2906 ///
2907 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2908 /// template specialization of this method.
2909 /// Note that internally, the summations are executed with Kahan sums in double precision, irrespective
2910 /// of the type of column that is read.
2911 ///
2912 /// This action is *lazy*: upon invocation of this method the calculation is
2913 /// booked but not executed. Also see RResultPtr.
2914 ///
2915 /// ### Example usage:
2916 /// ~~~{.cpp}
2917 /// // Deduce column type (this invocation needs jitting internally)
2918 /// auto meanVal0 = myDf.Mean("values");
2919 /// // Explicit column type
2920 /// auto meanVal1 = myDf.Mean<double>("values");
2921 /// ~~~
2922 ///
2923 template <typename T = RDFDetail::RInferredType>
2924 RResultPtr<double> Mean(std::string_view columnName = "")
2925 {
2926 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2927 auto meanV = std::make_shared<double>(0);
2929 }
2930
2931 ////////////////////////////////////////////////////////////////////////////
2932 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*).
2933 /// \tparam T The type of the branch/column.
2934 /// \param[in] columnName The name of the branch/column to be treated.
2935 /// \return the standard deviation value of the selected column wrapped in a RResultPtr.
2936 ///
2937 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2938 /// template specialization of this method.
2939 ///
2940 /// This action is *lazy*: upon invocation of this method the calculation is
2941 /// booked but not executed. Also see RResultPtr.
2942 ///
2943 /// ### Example usage:
2944 /// ~~~{.cpp}
2945 /// // Deduce column type (this invocation needs jitting internally)
2946 /// auto stdDev0 = myDf.StdDev("values");
2947 /// // Explicit column type
2948 /// auto stdDev1 = myDf.StdDev<double>("values");
2949 /// ~~~
2950 ///
2951 template <typename T = RDFDetail::RInferredType>
2952 RResultPtr<double> StdDev(std::string_view columnName = "")
2953 {
2954 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2955 auto stdDeviationV = std::make_shared<double>(0);
2957 }
2958
2959 // clang-format off
2960 ////////////////////////////////////////////////////////////////////////////
2961 /// \brief Return the sum of processed column values (*lazy action*).
2962 /// \tparam T The type of the branch/column.
2963 /// \param[in] columnName The name of the branch/column.
2964 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
2965 /// \return the sum of the selected column wrapped in a RResultPtr.
2966 ///
2967 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2968 /// template specialization of this method.
2969 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2970 ///
2971 /// This action is *lazy*: upon invocation of this method the calculation is
2972 /// booked but not executed. Also see RResultPtr.
2973 ///
2974 /// ### Example usage:
2975 /// ~~~{.cpp}
2976 /// // Deduce column type (this invocation needs jitting internally)
2977 /// auto sum0 = myDf.Sum("values");
2978 /// // Explicit column type
2979 /// auto sum1 = myDf.Sum<double>("values");
2980 /// ~~~
2981 ///
2982 template <typename T = RDFDetail::RInferredType>
2984 Sum(std::string_view columnName = "",
2985 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
2986 {
2987 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2988 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
2990 }
2991 // clang-format on
2992
2993 ////////////////////////////////////////////////////////////////////////////
2994 /// \brief Gather filtering statistics.
2995 /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr.
2996 ///
2997 /// Calling `Report` on the main `RDataFrame` object gathers stats for
2998 /// all named filters in the call graph. Calling this method on a
2999 /// stored chain state (i.e. a graph node different from the first) gathers
3000 /// the stats for all named filters in the chain section between the original
3001 /// `RDataFrame` and that node (included). Stats are gathered in the same
3002 /// order as the named filters have been added to the graph.
3003 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
3004 /// effects cuts had.
3005 ///
3006 /// This action is *lazy*: upon invocation of
3007 /// this method the calculation is booked but not executed. See RResultPtr
3008 /// documentation.
3009 ///
3010 /// ### Example usage:
3011 /// ~~~{.cpp}
3012 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
3013 /// auto cutReport = filtered3.Report();
3014 /// cutReport->Print();
3015 /// ~~~
3016 ///
3018 {
3019 bool returnEmptyReport = false;
3020 // if this is a RInterface<RLoopManager> on which `Define` has been called, users
3021 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
3022 // certainly does not contain named filters.
3023 // The number 4 takes into account the implicit columns for entry and slot number
3024 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
3025 if (std::is_same<Proxied, RLoopManager>::value && fColRegister.GenerateColumnNames().size() > 4)
3026 returnEmptyReport = true;
3027
3028 auto rep = std::make_shared<RCutFlowReport>();
3029 using Helper_t = RDFInternal::ReportHelper<Proxied>;
3031
3032 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr.get(), returnEmptyReport), ColumnNames_t({}),
3034
3035 return MakeResultPtr(rep, *fLoopManager, std::move(action));
3036 }
3037
3038 /// \brief Returns the names of the filters created.
3039 /// \return the container of filters names.
3040 ///
3041 /// If called on a root node, all the filters in the computation graph will
3042 /// be printed. For any other node, only the filters upstream of that node.
3043 /// Filters without a name are printed as "Unnamed Filter"
3044 /// This is not an action nor a transformation, just a query to the RDataFrame object.
3045 ///
3046 /// ### Example usage:
3047 /// ~~~{.cpp}
3048 /// auto filtNames = d.GetFilterNames();
3049 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
3050 /// ~~~
3051 ///
3052 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
3053
3054 // clang-format off
3055 ////////////////////////////////////////////////////////////////////////////
3056 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
3057 /// \tparam F The type of the aggregator callable. Automatically deduced.
3058 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
3059 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
3060 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
3061 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
3062 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
3063 /// \param[in] aggIdentity The aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted)
3064 /// \return the result of the aggregation wrapped in a RResultPtr.
3065 ///
3066 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
3067 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
3068 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
3069 /// the value of the column columnName.
3070 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
3071 /// Otherwise the signature of aggregator must be `void(U&,T)`.
3072 ///
3073 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
3074 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
3075 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
3076 ///
3077 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
3078 ///
3079 /// Example usage:
3080 /// ~~~{.cpp}
3081 /// auto aggregator = [](double acc, double x) { return acc * x; };
3082 /// ROOT::EnableImplicitMT();
3083 /// // If multithread is enabled, the aggregator function will be called by more threads
3084 /// // and will produce a vector of partial accumulators.
3085 /// // The merger function performs the final aggregation of these partial results.
3086 /// auto merger = [](std::vector<double> &accumulators) {
3087 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
3088 /// accumulators[0] *= accumulators[i];
3089 /// }
3090 /// };
3091 ///
3092 /// // The accumulator is initialized at this value by every thread.
3093 /// double initValue = 1.;
3094 ///
3095 /// // Multiplies all elements of the column "x"
3096 /// auto result = d.Aggregate(aggregator, merger, "x", initValue);
3097 /// ~~~
3098 // clang-format on
3100 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
3101 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
3102 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
3103 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
3105 {
3106 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
3107 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
3108
3111
3112 auto accObjPtr = std::make_shared<U>(aggIdentity);
3113 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
3115 auto action = std::make_unique<Action_t>(
3116 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
3118 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
3119 }
3120
3121 // clang-format off
3122 ////////////////////////////////////////////////////////////////////////////
3123 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
3124 /// \tparam F The type of the aggregator callable. Automatically deduced.
3125 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
3126 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
3127 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
3128 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
3129 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
3130 /// \return the result of the aggregation wrapped in a RResultPtr.
3131 ///
3132 /// See previous Aggregate overload for more information.
3133 // clang-format on
3135 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
3136 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
3137 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
3139 {
3140 static_assert(
3141 std::is_default_constructible<U>::value,
3142 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
3143 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
3144 }
3145
3146 // clang-format off
3147 ////////////////////////////////////////////////////////////////////////////
3148 /// \brief Book execution of a custom action using a user-defined helper object.
3149 /// \tparam FirstColumn The type of the first column used by this action. Inferred together with OtherColumns if not present.
3150 /// \tparam OtherColumns A list of the types of the other columns used by this action
3151 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
3152 /// \param[in] helper The Action Helper to be scheduled.
3153 /// \param[in] columns The names of the columns on which the helper acts.
3154 /// \return the result of the helper wrapped in a RResultPtr.
3155 ///
3156 /// This method books a custom action for execution. The behavior of the action is completely dependent on the
3157 /// Helper object provided by the caller. The required interface for the helper is described below (more
3158 /// methods that the ones required can be present, e.g. a constructor that takes the number of worker threads is usually useful):
3159 ///
3160 /// ### Mandatory interface
3161 ///
3162 /// * `Helper` must publicly inherit from `ROOT::Detail::RDF::RActionImpl<Helper>`
3163 /// * `Helper::Result_t`: public alias for the type of the result of this action helper. `Result_t` must be default-constructible.
3164 /// * `Helper(Helper &&)`: a move-constructor is required. Copy-constructors are discouraged.
3165 /// * `std::shared_ptr<Result_t> GetResultPtr() const`: return a shared_ptr to the result of this action (of type
3166 /// Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called
3167 /// _before_ Initialize(), because the RResultPtr is constructed before the event loop is started.
3168 /// * `void Initialize()`: this method is called once before starting the event-loop. Useful for setup operations.
3169 /// It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper,
3170 /// or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations).
3171 /// * `void InitTask(TTreeReader *, unsigned int slot)`: each working thread shall call this method during the event
3172 /// loop, before processing a batch of entries. The pointer passed as argument, if not null, will point to the TTreeReader
3173 /// that RDataFrame has set up to read the task's batch of entries. It is passed to the helper to allow certain advanced optimizations
3174 /// it should not usually serve any purpose for the Helper. This method is often no-op for simple helpers.
3175 /// * `void Exec(unsigned int slot, ColumnTypes...columnValues)`: each working thread shall call this method
3176 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
3177 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
3178 /// the requested columns for the particular entry being processed.
3179 /// * `void Finalize()`: this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
3180 /// * `std::string GetActionName()`: it returns a string identifier for this type of action that RDataFrame will use in
3181 /// diagnostics, SaveGraph(), etc.
3182 ///
3183 /// ### Optional methods
3184 ///
3185 /// If these methods are implemented they enable extra functionality as per the description below.
3186 ///
3187 /// * `Result_t &PartialUpdate(unsigned int slot)`: if present, it must return the value of the partial result of this action for the given 'slot'.
3188 /// Different threads might call this method concurrently, but will do so with different 'slot' numbers.
3189 /// RDataFrame leverages this method to implement RResultPtr::OnPartialResult().
3190 /// * `ROOT::RDF::SampleCallback_t GetSampleCallback()`: if present, it must return a callable with the
3191 /// appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing
3192 /// of every sample, as in DefinePerSample().
3193 /// * `Helper MakeNew(void *newResult, std::string_view variation = "nominal")`: if implemented, it enables varying
3194 /// the action's result with VariationsFor(). It takes a type-erased new result that can be safely cast to a
3195 /// `std::shared_ptr<Result_t> *` (a pointer to shared pointer) and should be used as the action's output result.
3196 /// The function optionally takes the name of the current variation which could be useful in customizing its behaviour.
3197 ///
3198 /// In case Book is called without specifying column types as template arguments, corresponding typed code will be just-in-time compiled
3199 /// by RDataFrame. In that case the Helper class needs to be known to the ROOT interpreter.
3200 ///
3201 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
3202 ///
3203 /// ### Examples
3204 /// See [this tutorial](https://root.cern/doc/master/df018__customActions_8C.html) for an example implementation of an action helper.
3205 ///
3206 /// It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx.
3207 ///
3208 // clang-format on
3209 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper>
3211 {
3212 using HelperT = std::decay_t<Helper>;
3213 // TODO add more static sanity checks on Helper
3215 static_assert(std::is_base_of<AH, HelperT>::value && std::is_convertible<HelperT *, AH *>::value,
3216 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
3217
3218 auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper));
3219 auto resPtr = hPtr->GetResultPtr();
3220
3221 if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) {
3223 } else {
3224 return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
3225 fProxiedPtr, columns.size());
3226 }
3227 }
3228
3229 ////////////////////////////////////////////////////////////////////////////
3230 /// \brief Provides a representation of the columns in the dataset.
3231 /// \tparam ColumnTypes variadic list of branch/column types.
3232 /// \param[in] columnList Names of the columns to be displayed.
3233 /// \param[in] nRows Number of events for each column to be displayed.
3234 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3235 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3236 ///
3237 /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
3238 /// form. RDisplay will either print on the standard output a summarized version through `RDisplay::Print()` or will
3239 /// return a complete version through `RDisplay::AsString()`.
3240 ///
3241 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see
3242 /// RResultPtr.
3243 ///
3244 /// Example usage:
3245 /// ~~~{.cpp}
3246 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
3247 /// auto d1 = rdf.Display("");
3248 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
3249 /// auto d2 = d.Display({"x", "y"}, 128);
3250 /// // Printing the short representations, the event loop will run
3251 /// d1->Print();
3252 /// d2->Print();
3253 /// ~~~
3254 template <typename... ColumnTypes>
3256 {
3257 CheckIMTDisabled("Display");
3258 auto newCols = columnList;
3259 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3260 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3261 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3262 // Need to add ULong64_t type corresponding to the first column rdfentry_
3263 return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>(
3264 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr);
3265 }
3266
3267 ////////////////////////////////////////////////////////////////////////////
3268 /// \brief Provides a representation of the columns in the dataset.
3269 /// \param[in] columnList Names of the columns to be displayed.
3270 /// \param[in] nRows Number of events for each column to be displayed.
3271 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3272 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3273 ///
3274 /// This overload automatically infers the column types.
3275 /// See the previous overloads for further details.
3276 ///
3277 /// Invoked when no types are specified to Display
3279 {
3280 CheckIMTDisabled("Display");
3281 auto newCols = columnList;
3282 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3283 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3284 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3286 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr,
3287 columnList.size() + 1);
3288 }
3289
3290 ////////////////////////////////////////////////////////////////////////////
3291 /// \brief Provides a representation of the columns in the dataset.
3292 /// \param[in] columnNameRegexp A regular expression to select the columns.
3293 /// \param[in] nRows Number of events for each column to be displayed.
3294 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3295 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3296 ///
3297 /// The existing columns are matched against the regular expression. If the string provided
3298 /// is empty, all columns are selected.
3299 /// See the previous overloads for further details.
3301 Display(std::string_view columnNameRegexp = "", size_t nRows = 5, size_t nMaxCollectionElements = 10)
3302 {
3303 const auto columnNames = GetColumnNames();
3306 }
3307
3308 ////////////////////////////////////////////////////////////////////////////
3309 /// \brief Provides a representation of the columns in the dataset.
3310 /// \param[in] columnList Names of the columns to be displayed.
3311 /// \param[in] nRows Number of events for each column to be displayed.
3312 /// \param[in] nMaxCollectionElements Number of maximum elements in collection.
3313 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3314 ///
3315 /// See the previous overloads for further details.
3317 Display(std::initializer_list<std::string> columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3318 {
3321 }
3322
3323private:
3325 std::enable_if_t<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>
3326 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
3327 {
3328 if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine
3332 } else {
3336 }
3337
3338 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
3340 std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::Slot>::value, ArgTypes_t>::type;
3342 std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::SlotAndEntry>::value, ColTypesTmp_t>::type;
3343
3344 constexpr auto nColumns = ColTypes_t::list_size;
3345
3348
3349 // Declare return type to the interpreter, for future use by jitted actions
3351 if (retTypeName.empty()) {
3352 // The type is not known to the interpreter.
3353 // We must not error out here, but if/when this column is used in jitted code
3355 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3356 }
3357
3359 auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
3361
3363 newCols.AddDefine(std::move(newColumn));
3364
3366
3367 return newInterface;
3368 }
3369
3370 // This overload is chosen when the callable passed to Define or DefineSlot returns void.
3371 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
3372 // this way compilation of `Define` has no way to continue after throwing the error.
3374 bool IsFStringConv = std::is_convertible<F, std::string>::value,
3375 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
3376 std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>
3377 DefineImpl(std::string_view, F, const ColumnNames_t &, const std::string &)
3378 {
3379 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
3380 "Error in `Define`: type returned by expression is not default-constructible");
3381 return *this; // never reached
3382 }
3383
3384 ////////////////////////////////////////////////////////////////////////////
3385 /// \brief Implementation of cache.
3386 template <typename... ColTypes, std::size_t... S>
3388 {
3390
3391 // Check at compile time that the columns types are copy constructible
3392 constexpr bool areCopyConstructible =
3393 RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value;
3394 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
3395
3397
3398 auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
3399 auto ds = std::make_unique<RLazyDS<ColTypes...>>(
3400 std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
3401
3402 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns));
3403
3404 return cachedRDF;
3405 }
3406
3407 template <bool IsSingleColumn, typename F>
3409 VaryImpl(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
3410 const std::vector<std::string> &variationTags, std::string_view variationName)
3411 {
3412 using F_t = std::decay_t<F>;
3413 using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types;
3414 using RetType = typename TTraits::CallableTraits<F_t>::ret_type;
3415 constexpr auto nColumns = ColTypes_t::list_size;
3416
3418
3421
3423 if (retTypeName.empty()) {
3424 // The type is not known to the interpreter, but we don't want to error out
3425 // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name.
3427 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3428 }
3429
3430 auto variation = std::make_shared<RDFInternal::RVariation<F_t, IsSingleColumn>>(
3431 colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager,
3433
3435 newCols.AddVariation(std::move(variation));
3436
3438
3439 return newInterface;
3440 }
3441
3442 RInterface<Proxied, DS_t> JittedVaryImpl(const std::vector<std::string> &colNames, std::string_view expression,
3443 const std::vector<std::string> &variationTags,
3444 std::string_view variationName, bool isSingleColumn)
3445 {
3446 R__ASSERT(!variationTags.empty() && "Must have at least one variation.");
3447 R__ASSERT(!colNames.empty() && "Must have at least one varied column.");
3448 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
3449
3450 for (auto &colName : colNames) {
3454 }
3456
3457 // when varying multiple columns, they must be different columns
3458 if (colNames.size() > 1) {
3459 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
3460 if (uniqueCols.size() != colNames.size())
3461 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
3462 }
3463
3464 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
3465 auto jittedVariation =
3468
3470 newColRegister.AddVariation(std::move(jittedVariation));
3471
3473
3474 return newInterface;
3475 }
3476
3477 template <typename Helper, typename ActionResultType>
3478 auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr,
3479 const std::shared_ptr<Helper> &hPtr,
3481 -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
3482 {
3484 }
3485
3486 template <typename Helper, typename ActionResultType, typename... Others>
3488 CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &,
3489 const std::shared_ptr<Helper>& /*hPtr*/,
3490 Others...)
3491 {
3492 throw std::logic_error(std::string("An action was booked with no input columns, but the action requires "
3493 "columns! The action helper type was ") +
3494 typeid(Helper).name());
3495 return {};
3496 }
3497
3498protected:
3499 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
3502 {
3503 }
3504
3505 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
3506};
3507
3508} // namespace RDF
3509
3510} // namespace ROOT
3511
3512#endif // ROOT_RDF_INTERFACE
#define R__LOG_INFO(...)
Definition RLogger.hxx:359
#define f(i)
Definition RSha256.hxx:104
#define h(i)
Definition RSha256.hxx:106
Basic types used by ROOT and required by TInterpreter.
unsigned int UInt_t
Unsigned integer 4 bytes (unsigned int)
Definition RtypesCore.h:60
long long Long64_t
Portable signed long integer 8 bytes.
Definition RtypesCore.h:83
unsigned long long ULong64_t
Portable unsigned long integer 8 bytes.
Definition RtypesCore.h:84
#define X(type, name)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
R__EXTERN TEnv * gEnv
Definition TEnv.h:170
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:252
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
Base class for action helpers, see RInterface::Book() for more information.
implementation of FilterAvailable and FilterMissing operations
The head node of a RDF computation graph.
Helper class that provides the operation graph nodes.
A RDataFrame node that produces a result.
Definition RAction.hxx:53
A binder for user-defined columns, variations and aliases.
std::vector< std::string_view > GenerateColumnNames() const
Return the list of the names of the defined columns (Defines + Aliases).
RDFDetail::RDefineBase * GetDefine(std::string_view colName) const
Return the RDefine for the requested column name, or nullptr.
The dataset specification for RDataFrame.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
ColumnNames_t GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::shared_ptr< ROOT::Detail::RDF::RLoopManager > fLoopManager
< The RLoopManager at the root of this computation graph. Never null.
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const std::shared_ptr< RDFNode > &proxiedPtr, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
RDataSource * GetDataSource() const
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
void CheckIMTDisabled(std::string_view callerName)
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RDFDetail::RLoopManager * GetLoopManager() const
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
The public interface to the RDataFrame federation of classes.
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RColumnRegister &colRegister)
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action).
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
std::enable_if_t<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > > DefineImpl(std::string_view, F, const ColumnNames_t &, const std::string &)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree or RNTuple treename in file filename.
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for a single existing column using auto-generated variation tags.
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for a single existing column using auto-generated variation tags.
RResultPtr<::TGraph > Graph(std::string_view x="", std::string_view y="")
Fill and return a TGraph object (lazy action).
RResultPtr<::THnSparseD > HistoNSparseD(const THnSparseDModel &model, const ColumnNames_t &columnList)
Fill and return a sparse N-dimensional histogram (lazy action).
RResultPtr< ActionResultType > CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &, const std::shared_ptr< Helper > &, Others...)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot.
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action).
std::enable_if_t< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > > DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, F expression)
Define a new column that is updated when the input sample changes.
RInterface & operator=(RInterface &&)=default
Move-assignment operator for RInterface.
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName)
Register systematic variations for multiple existing columns using auto-generated tags.
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for a single existing column using custom variation tags.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column.
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action).
RInterface< Proxied, DS_t > Redefine(std::string_view name, std::string_view expression)
Overwrite the value and/or type of an existing column.
auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &resPtr, const std::shared_ptr< Helper > &hPtr, TTraits::TypeList< RDFDetail::RInferredType >) -> decltype(hPtr->Exec(0u), RResultPtr< ActionResultType >{})
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName)
Register systematic variations for multiple existing columns using auto-generated variation tags.
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action).
RInterface< Proxied, DS_t > Vary(std::initializer_list< std::string > colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
Register systematic variations for multiple existing columns using custom variation tags.
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
Fill and return a one-dimensional profile (lazy action).
RInterface(const std::shared_ptr< RLoopManager > &proxied)
Build a RInterface from a RLoopManager.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, std::string_view expression)
Define a new column that is updated when the input sample changes.
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree or RNTuple treename in file filename.
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Redefine(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
RInterface< Proxied, DS_t > VaryImpl(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
RResultPtr< typename std::decay_t< Helper >::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< RDFDetail::RFilterWithMissingValues< Proxied >, DS_t > FilterAvailable(std::string_view column)
Discard entries with missing values.
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action).
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for a single existing column using custom variation tags.
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action).
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName)
Register systematic variations for multiple existing columns using custom variation tags.
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Define a new column.
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action).
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
Register systematic variations for multiple existing columns using custom variation tags.
RInterface< Proxied, DS_t > RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... >)
Implementation of cache.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
RInterface< RDFDetail::RFilterWithMissingValues< Proxied >, DS_t > FilterMissing(std::string_view column)
Keep only the entries that have missing values.
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action).
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RInterface< Proxied, DS_t > JittedVaryImpl(const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName, bool isSingleColumn)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action).
RResultPtr<::THnSparseD > HistoNSparseD(const THnSparseDModel &model, const ColumnNames_t &columnList)
Fill and return a sparse N-dimensional histogram (lazy action).
RInterface< Proxied, DS_t > Vary(std::initializer_list< std::string > colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName)
Register systematic variations for for multiple existing columns using custom variation tags.
RResultPtr< std::decay_t< T > > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action).
R__DEPRECATED(6, 40, "Snapshot does not need template arguments anymore, you can safely remove them from this function call.") RResultPtr< RInterface< RLoopManager > > Snapshot(std
Save selected columns to disk, in a new TTree or RNTuple treename in file filename.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, size_t nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RInterface< Proxied, DS_t > RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action).
RResultPtr<::TGraphAsymmErrors > GraphAsymmErrors(std::string_view x="", std::string_view y="", std::string_view exl="", std::string_view exh="", std::string_view eyl="", std::string_view eyh="")
Fill and return a TGraphAsymmErrors object (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree or RNTuple treename in file filename.
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot and the current entry.
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action).
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
Fill and return a two-dimensional profile (lazy action).
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface(RInterface &&)=default
Move-ctor for RInterface.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action).
RInterface< Proxied, DS_t > DefaultValueFor(std::string_view column, const T &defaultValue)
In case the value in the given column is missing, provide a default value.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end).
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action).
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action).
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action).
RInterface< Proxied, DS_t > Vary(std::initializer_list< std::string > colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName)
Register systematic variations for multiple existing columns using auto-generated variation tags.
A RDataSource implementation which is built on top of result proxies.
Smart pointer for the return type of actions.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Change the verbosity level (global or specific to the RLogChannel passed to the constructor) for the ...
Definition RLogger.hxx:240
const_iterator begin() const
const_iterator end() const
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
virtual Int_t GetValue(const char *name, Int_t dflt) const
Returns the integer value for a resource.
Definition TEnv.cxx:505
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
@ kAllAxes
Definition TH1.h:126
Statistical variable, defined by its mean and variance (RMS).
Definition TStatistic.h:33
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
ROOT::RLogChannel & RDFLogChannel()
Definition RDFUtils.cxx:42
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister)
Throw if the column has systematic variations attached.
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition RDFUtils.cxx:73
void ChangeEmptyEntryRange(const ROOT::RDF::RNode &node, std::pair< ULong64_t, ULong64_t > &&newRange)
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
void CheckValidCppVarName(std::string_view var, const std::string &where)
void ChangeSpec(const ROOT::RDF::RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec)
Changes the input dataset specification of an RDataFrame.
const std::vector< std::string > & GetTopLevelFieldNames(const ROOT::RDF::RDataSource &ds)
Definition RDFUtils.cxx:637
void RemoveDuplicates(ColumnNames_t &columnNames)
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition RDFUtils.cxx:178
void CheckSnapshotOptionsFormatCompatibility(const ROOT::RDF::RSnapshotOptions &opts)
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
std::string GetDataSourceLabel(const ROOT::RDF::RNode &node)
std::string PrettyPrintAddr(const void *const addr)
void TriggerRun(ROOT::RDF::RNode node)
Trigger the execution of an RDataFrame computation graph.
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
std::string DemangleTypeIdName(const std::type_info &typeInfo)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
std::pair< std::vector< std::string >, std::vector< std::string > > AddSizeBranches(ROOT::RDF::RDataSource *ds, std::vector< std::string > &&colsWithoutAliases, std::vector< std::string > &&colsWithAliases)
Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array b...
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2RVec=true)
Return a string containing the type of the given branch.
Definition RDFUtils.cxx:317
void RemoveRNTupleSubFields(ColumnNames_t &columnNames)
void SetTTreeLifeline(ROOT::RDF::RNode &node, std::any lifeline)
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, std::shared_ptr< RNodeBase > *upcastNodeOnHeap, bool isSingleColumn)
Book the jitting of a Vary call.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &colRegister, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &colRegister, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end)
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
std::vector< std::string > ColumnNames_t
ROOT type_traits extensions.
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition TROOT.cxx:544
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition TROOT.cxx:600
@ kInfo
Informational messages; used for instance for tracing.
@ kError
An error.
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition TROOT.cxx:586
type is TypeList if MustRemove is false, otherwise it is a TypeList with the first type removed
Definition Utils.hxx:153
Tag to let data sources use the native data type when creating a column reader.
Definition Utils.hxx:344
A collection of options to steer the creation of the dataset on disk through Snapshot().
A struct which stores some basic parameters of a TH1D.
std::shared_ptr<::TH1D > GetHistogram() const
A struct which stores some basic parameters of a TH2D.
std::shared_ptr<::TH2D > GetHistogram() const
A struct which stores some basic parameters of a TH3D.
std::shared_ptr<::TH3D > GetHistogram() const
A struct which stores some basic parameters of a THnD.
std::shared_ptr<::THnD > GetHistogram() const
A struct which stores some basic parameters of a THnSparseD.
std::shared_ptr<::THnSparseD > GetHistogram() const
A struct which stores some basic parameters of a TProfile.
std::shared_ptr<::TProfile > GetProfile() const
A struct which stores some basic parameters of a TProfile2D.
std::shared_ptr<::TProfile2D > GetProfile() const
Lightweight storage for a collection of types.