Logo ROOT  
Reference Guide
RInterface.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2
3/*************************************************************************
4 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDF_TINTERFACE
12#define ROOT_RDF_TINTERFACE
13
14#include "ROOT/InternalTreeUtils.hxx" // for GetFileNamesFromTree and GetFriendInfo
15#include "ROOT/RDataSource.hxx"
20#include "ROOT/RDF/RDefine.hxx"
22#include "ROOT/RDF/RFilter.hxx"
26#include "ROOT/RDF/RRange.hxx"
27#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/RResultPtr.hxx"
32#include "ROOT/RStringView.hxx"
33#include "ROOT/RVec.hxx"
34#include "ROOT/TypeTraits.hxx"
35#include "RtypesCore.h" // for ULong64_t
36#include "TChain.h" // for checking fLoopManger->GetTree() return type
37#include "TDirectory.h"
38#include "TH1.h" // For Histo actions
39#include "TH2.h" // For Histo actions
40#include "TH3.h" // For Histo actions
41#include "THn.h"
42#include "TProfile.h"
43#include "TProfile2D.h"
44#include "TStatistic.h"
45
46#include <algorithm>
47#include <cstddef>
48#include <initializer_list>
49#include <iterator> // std::back_insterter
50#include <limits>
51#include <memory>
52#include <set>
53#include <sstream>
54#include <stdexcept>
55#include <string>
56#include <type_traits> // is_same, enable_if
57#include <typeinfo>
58#include <unordered_set>
59#include <utility> // std::index_sequence
60#include <vector>
61
62class TGraph;
63
64// Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
65namespace ROOT {
68void EnableImplicitMT(UInt_t numthreads);
69class RDataFrame;
70namespace Internal {
71namespace RDF {
73}
74} // namespace Internal
75} // namespace ROOT
76namespace cling {
77std::string printValue(ROOT::RDataFrame *tdf);
78}
79
80namespace ROOT {
81namespace RDF {
84namespace TTraits = ROOT::TypeTraits;
85
86template <typename Proxied, typename DataSource>
87class RInterface;
88
89using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
90
91// clang-format off
92/**
93 * \class ROOT::RDF::RInterface
94 * \ingroup dataframe
95 * \brief The public interface to the RDataFrame federation of classes.
96 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
97 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
98 *
99 * The documentation of each method features a one liner illustrating how to use the method, for example showing how
100 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
101 */
102// clang-format on
103template <typename Proxied, typename DataSource = void>
105 using DS_t = DataSource;
109 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
111
112 template <typename T, typename W>
113 friend class RInterface;
114
115 friend void RDFInternal::TriggerRun(RNode &node);
116
117 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
118 ///< The RLoopManager at the root of this computation graph. Never null.
120 /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
122
123 /// Contains the columns defined up to this node.
125
126 std::string DescribeDataset() const
127 {
128 // TTree/TChain as input
129 const auto tree = fLoopManager->GetTree();
130 if (tree) {
131 const auto treeName = tree->GetName();
132 const auto isTChain = dynamic_cast<TChain *>(tree) ? true : false;
133 const auto treeType = isTChain ? "TChain" : "TTree";
134 const auto isInMemory = !isTChain && !tree->GetCurrentFile() ? true : false;
135 const auto friendInfo = ROOT::Internal::TreeUtils::GetFriendInfo(*tree);
136 const auto hasFriends = friendInfo.fFriendNames.empty() ? false : true;
137 std::stringstream ss;
138 ss << "Dataframe from " << treeType << " " << treeName;
139 if (isInMemory) {
140 ss << " (in-memory)";
141 } else {
143 const auto numFiles = files.size();
144 if (numFiles == 1) {
145 ss << " in file " << files[0];
146 } else {
147 ss << " in files\n";
148 for (auto i = 0u; i < numFiles; i++) {
149 ss << " " << files[i];
150 if (i < numFiles - 1)
151 ss << '\n';
152 }
153 }
154 }
155 if (hasFriends) {
156 const auto numFriends = friendInfo.fFriendNames.size();
157 if (numFriends == 1) {
158 ss << "\nwith friend\n";
159 } else {
160 ss << "\nwith friends\n";
161 }
162 for (auto i = 0u; i < numFriends; i++) {
163 const auto nameAlias = friendInfo.fFriendNames[i];
164 const auto files = friendInfo.fFriendFileNames[i];
165 const auto numFiles = files.size();
166 const auto subnames = friendInfo.fFriendChainSubNames[i];
167 ss << " " << nameAlias.first;
168 if (nameAlias.first != nameAlias.second)
169 ss << " (" << nameAlias.second << ")";
170 // case: TTree as friend
171 if (numFiles == 1) {
172 ss << " " << files[0];
173 }
174 // case: TChain as friend
175 else {
176 ss << '\n';
177 for (auto j = 0u; j < numFiles; j++) {
178 ss << " " << subnames[j] << " " << files[j];
179 if (j < numFiles - 1)
180 ss << '\n';
181 }
182 }
183 if (i < numFriends - 1)
184 ss << '\n';
185 }
186 }
187 return ss.str();
188 }
189 // Datasource as input
190 else if (fDataSource) {
191 const auto datasourceLabel = fDataSource->GetLabel();
192 return "Dataframe from datasource " + datasourceLabel;
193 }
194 // Trivial/empty datasource
195 else {
196 const auto n = fLoopManager->GetNEmptyEntries();
197 if (n == 1) {
198 return "Empty dataframe filling 1 row";
199 } else {
200 return "Empty dataframe filling " + std::to_string(n) + " rows";
201 }
202 }
203 }
204
205public:
206 ////////////////////////////////////////////////////////////////////////////
207 /// \brief Copy-assignment operator for RInterface.
208 RInterface &operator=(const RInterface &) = default;
209
210 ////////////////////////////////////////////////////////////////////////////
211 /// \brief Copy-ctor for RInterface.
212 RInterface(const RInterface &) = default;
213
214 ////////////////////////////////////////////////////////////////////////////
215 /// \brief Move-ctor for RInterface.
216 RInterface(RInterface &&) = default;
217
218 ////////////////////////////////////////////////////////////////////////////
219 /// \brief Move-assignment operator for RInterface.
221
222 ////////////////////////////////////////////////////////////////////////////
223 /// \brief Build a RInterface from a RLoopManager.
224 /// This constructor is only available for RInterface<RLoopManager>.
226 RInterface(const std::shared_ptr<RLoopManager> &proxied)
227 : fProxiedPtr(proxied), fLoopManager(proxied.get()), fDataSource(proxied->GetDataSource()), fColRegister(proxied)
228 {
230 }
231
232 ////////////////////////////////////////////////////////////////////////////
233 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
234 /// Different RDataFrame methods return different C++ types. All nodes, however,
235 /// can be cast to this common type at the cost of a small performance penalty.
236 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
237 /// around via (non-template, C++11) helper functions.
238 /// Example usage:
239 /// ~~~{.cpp}
240 /// // a function that conditionally adds a Range to a RDataFrame node.
241 /// RNode MaybeAddRange(RNode df, bool mustAddRange)
242 /// {
243 /// return mustAddRange ? df.Range(1) : df;
244 /// }
245 /// // use as :
246 /// ROOT::RDataFrame df(10);
247 /// auto maybeRanged = MaybeAddRange(df, true);
248 /// ~~~
249 /// Note that it is not a problem to pass RNode's by value.
250 operator RNode() const
251 {
252 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister,
254 }
255
256 ////////////////////////////////////////////////////////////////////////////
257 /// \brief Append a filter to the call graph.
258 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
259 /// signalling whether the event has passed the selection (true) or not (false).
260 /// \param[in] columns Names of the columns/branches in input to the filter function.
261 /// \param[in] name Optional name of this filter. See `Report`.
262 /// \return the filter node of the computation graph.
263 ///
264 /// Append a filter node at the point of the call graph corresponding to the
265 /// object this method is called on.
266 /// The callable `f` should not have side-effects (e.g. modification of an
267 /// external or static variable) to ensure correct results when implicit
268 /// multi-threading is active.
269 ///
270 /// RDataFrame only evaluates filters when necessary: if multiple filters
271 /// are chained one after another, they are executed in order and the first
272 /// one returning false causes the event to be discarded.
273 /// Even if multiple actions or transformations depend on the same filter,
274 /// it is executed once per entry. If its result is requested more than
275 /// once, the cached result is served.
276 ///
277 /// ### Example usage:
278 /// ~~~{.cpp}
279 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
280 /// auto filtered = df.Filter(myCut, {"x", "y"});
281 ///
282 /// // String: it must contain valid C++ except that column names can be used instead of variable names
283 /// auto filtered = df.Filter("x*y > 0");
284 /// ~~~
287 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
288 {
289 RDFInternal::CheckFilter(f);
290 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
291 constexpr auto nColumns = ColTypes_t::list_size;
292 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
293 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
294
296
297 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name);
298 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister, fDataSource);
299 }
300
301 ////////////////////////////////////////////////////////////////////////////
302 /// \brief Append a filter to the call graph.
303 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
304 /// signalling whether the event has passed the selection (true) or not (false).
305 /// \param[in] name Optional name of this filter. See `Report`.
306 /// \return the filter node of the computation graph.
307 ///
308 /// Refer to the first overload of this method for the full documentation.
311 {
312 // The sfinae is there in order to pick up the overloaded method which accepts two strings
313 // rather than this template method.
314 return Filter(f, {}, name);
315 }
316
317 ////////////////////////////////////////////////////////////////////////////
318 /// \brief Append a filter to the call graph.
319 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
320 /// signalling whether the event has passed the selection (true) or not (false).
321 /// \param[in] columns Names of the columns/branches in input to the filter function.
322 /// \return the filter node of the computation graph.
323 ///
324 /// Refer to the first overload of this method for the full documentation.
325 template <typename F>
326 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
327 {
328 return Filter(f, ColumnNames_t{columns});
329 }
330
331 ////////////////////////////////////////////////////////////////////////////
332 /// \brief Append a filter to the call graph.
333 /// \param[in] expression The filter expression in C++
334 /// \param[in] name Optional name of this filter. See `Report`.
335 /// \return the filter node of the computation graph.
336 ///
337 /// The expression is just-in-time compiled and used to filter entries. It must
338 /// be valid C++ syntax in which variable names are substituted with the names
339 /// of branches/columns.
340 ///
341 /// ### Example usage:
342 /// ~~~{.cpp}
343 /// auto filtered_df = df.Filter("myCollection.size() > 3");
344 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
345 /// ~~~
347 {
348 // deleted by the jitted call to JitFilterHelper
349 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
350 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
351 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister, fDataSource);
352 const auto jittedFilter =
355
358 }
359
360 // clang-format off
361 ////////////////////////////////////////////////////////////////////////////
362 /// \brief Define a new column.
363 /// \param[in] name The name of the defined column.
364 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
365 /// \param[in] columns Names of the columns/branches in input to the producer function.
366 /// \return the first node of the computation graph for which the new quantity is defined.
367 ///
368 /// Define a column that will be visible from all subsequent nodes
369 /// of the functional chain. The `expression` is only evaluated for entries that pass
370 /// all the preceding filters.
371 /// A new variable is created called `name`, accessible as if it was contained
372 /// in the dataset from subsequent transformations/actions.
373 ///
374 /// Use cases include:
375 /// * caching the results of complex calculations for easy and efficient multiple access
376 /// * extraction of quantities of interest from complex objects
377 ///
378 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
379 ///
380 /// ### Example usage:
381 /// ~~~{.cpp}
382 /// // assuming a function with signature:
383 /// double myComplexCalculation(const RVec<float> &muon_pts);
384 /// // we can pass it directly to Define
385 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
386 /// // alternatively, we can pass the body of the function as a string, as in Filter:
387 /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
388 /// ~~~
391 {
392 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns, "Define");
393 }
394 // clang-format on
395
396 // clang-format off
397 ////////////////////////////////////////////////////////////////////////////
398 /// \brief Define a new column with a value dependent on the processing slot.
399 /// \param[in] name The name of the defined column.
400 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
401 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
402 /// \return the first node of the computation graph for which the new quantity is defined.
403 ///
404 /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner.
405 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
406 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
407 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
408 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
409 ///
410 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
411 /// ~~~{.cpp}
412 /// int function(unsigned int, double, double);
413 /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
414 /// df.DefineSlot("x", function, {"column1", "column2"})
415 /// ~~~
416 ///
417 /// See Define for more information.
418 template <typename F>
420 {
421 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns, "DefineSlot");
422 }
423 // clang-format on
424
425 // clang-format off
426 ////////////////////////////////////////////////////////////////////////////
427 /// \brief Define a new column with a value dependent on the processing slot and the current entry.
428 /// \param[in] name The name of the defined column.
429 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
430 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
431 /// \return the first node of the computation graph for which the new quantity is defined.
432 ///
433 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
434 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
435 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
436 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
437 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter
438 /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
439 ///
440 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
441 /// ~~~{.cpp}
442 /// int function(unsigned int, ULong64_t, double, double);
443 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
444 /// DefineSlotEntry("x", function, {"column1", "column2"})
445 /// ~~~
446 ///
447 /// See Define for more information.
448 template <typename F>
450 {
451 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns,
452 "DefineSlotEntry");
453 }
454 // clang-format on
455
456 ////////////////////////////////////////////////////////////////////////////
457 /// \brief Define a new column.
458 /// \param[in] name The name of the defined column.
459 /// \param[in] expression An expression in C++ which represents the defined value
460 /// \return the first node of the computation graph for which the new quantity is defined.
461 ///
462 /// The expression is just-in-time compiled and used to produce the column entries.
463 /// It must be valid C++ syntax in which variable names are substituted with the names
464 /// of branches/columns.
465 ///
466 /// Refer to the first overload of this method for the full documentation.
468 {
469 constexpr auto where = "Define";
471 // these checks must be done before jitting lest we throw exceptions in jitted code
474
475 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
476 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
477 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
478
480 newCols.AddDefine(jittedDefine);
481
482 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
483
484 return newInterface;
485 }
486
487 ////////////////////////////////////////////////////////////////////////////
488 /// \brief Overwrite the value and/or type of an existing column.
489 /// \param[in] name The name of the column to redefine.
490 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
491 /// \param[in] columns Names of the columns/branches in input to the expression.
492 /// \return the first node of the computation graph for which the quantity is redefined.
493 ///
494 /// The old value of the column can be used as an input for the expression.
495 ///
496 /// An exception is thrown in case the column to redefine does not already exist.
497 /// See Define() for more information.
500 {
501 return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns, "Redefine");
502 }
503
504 // clang-format off
505 ////////////////////////////////////////////////////////////////////////////
506 /// \brief Overwrite the value and/or type of an existing column.
507 /// \param[in] name The name of the column to redefine.
508 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
509 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot).
510 /// \return the first node of the computation graph for which the new quantity is defined.
511 ///
512 /// The old value of the column can be used as an input for the expression.
513 /// An exception is thrown in case the column to redefine does not already exist.
514 ///
515 /// See DefineSlot() for more information.
516 // clang-format on
517 template <typename F>
519 {
520 return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns, "RedefineSlot");
521 }
522
523 // clang-format off
524 ////////////////////////////////////////////////////////////////////////////
525 /// \brief Overwrite the value and/or type of an existing column.
526 /// \param[in] name The name of the column to redefine.
527 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
528 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
529 /// \return the first node of the computation graph for which the new quantity is defined.
530 ///
531 /// The old value of the column can be used as an input for the expression.
532 /// An exception is thrown in case the column to re-define does not already exist.
533 ///
534 /// See DefineSlotEntry() for more information.
535 // clang-format on
536 template <typename F>
538 {
539 return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns,
540 "RedefineSlotEntry");
541 }
542
543 ////////////////////////////////////////////////////////////////////////////
544 /// \brief Overwrite the value and/or type of an existing column.
545 /// \param[in] name The name of the column to redefine.
546 /// \param[in] expression An expression in C++ which represents the defined value
547 /// \return the first node of the computation graph for which the new quantity is defined.
548 ///
549 /// The expression is just-in-time compiled and used to produce the column entries.
550 /// It must be valid C++ syntax in which variable names are substituted with the names
551 /// of branches/columns.
552 ///
553 /// The old value of the column can be used as an input for the expression.
554 /// An exception is thrown in case the column to re-define does not already exist.
555 ///
556 /// Aliases cannot be overridden. See the corresponding Define() overload for more information.
558 {
559 constexpr auto where = "Redefine";
564
565 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
566 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
567 fLoopManager->GetBranchNames(), upcastNodeOnHeap);
568
570 newCols.AddDefine(jittedDefine);
571
572 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
573
574 return newInterface;
575 }
576
577 // clang-format off
578 ////////////////////////////////////////////////////////////////////////////
579 /// \brief Define a new column that is updated when the input sample changes.
580 /// \param[in] name The name of the defined column.
581 /// \param[in] expression A C++ callable that computes the new value of the defined column.
582 /// \return the first node of the computation graph for which the new quantity is defined.
583 ///
584 /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)`
585 /// where:
586 /// - `T` is the type of the defined column
587 /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify
588 /// the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
589 /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is
590 /// being processed (see the class docs for more information).
591 ///
592 /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being
593 /// processed or to inject a callback into the event loop that is only called when the processing of a new sample
594 /// starts rather than at every entry.
595 ///
596 /// The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often.
597 ///
598 /// ### Example usage:
599 /// ~~~{.cpp}
600 /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
601 /// df.DefinePerSample("weightbysample",
602 /// [](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
603 /// { return id.Contains("sample1") ? 1.0f : 2.0f; });
604 /// ~~~
605 // clang-format on
606 // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch
607 template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
609 {
610 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
613
614 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t));
615 if (retTypeName.empty()) {
616 // The type is not known to the interpreter.
617 // We must not error out here, but if/when this column is used in jitted code
618 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t));
619 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
620 }
621
622 auto newColumn =
623 std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager);
624
625 auto updateDefinePerSample = [newColumn](unsigned int slot, const ROOT::RDF::RSampleInfo &id) {
626 newColumn->Update(slot, id);
627 };
628 fLoopManager->AddSampleCallback(std::move(updateDefinePerSample));
629
631 newCols.AddDefine(std::move(newColumn));
632 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
633 return newInterface;
634 }
635
636 // clang-format off
637 ////////////////////////////////////////////////////////////////////////////
638 /// \brief Define a new column that is updated when the input sample changes.
639 /// \param[in] name The name of the defined column.
640 /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value.
641 /// \return the first node of the computation graph for which the new quantity is defined.
642 ///
643 /// The expression is just-in-time compiled and used to produce the column entries.
644 /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is
645 /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the
646 /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.
647 ///
648 /// ### Example usage:
649 /// ~~~{.py}
650 /// df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root'])
651 /// df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f')
652 /// ~~~
653 ///
654 /// \note
655 /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this
656 /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as
657 /// input parameters. This is for example the correct way to call this overload when working in PyROOT:
658 /// ~~~{.py}
659 /// ROOT.gInterpreter.Declare(
660 /// """
661 /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
662 /// return id.Contains("sample1") ? 1.0f : 2.0f;
663 /// }
664 /// """)
665 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
666 /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
667 /// ~~~
668 ///
669 /// \note
670 /// Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain
671 /// column names other than those mentioned above: the expression is evaluated once before the processing of the
672 /// sample even starts, so column values are not accessible.
673 // clang-format on
675 {
676 RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
677 // these checks must be done before jitting lest we throw exceptions in jitted code
680
681 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
682 auto jittedDefine =
683 RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fColRegister, upcastNodeOnHeap);
684 auto updateDefinePerSample = [jittedDefine](unsigned int slot, const ROOT::RDF::RSampleInfo &id) {
685 jittedDefine->Update(slot, id);
686 };
687 fLoopManager->AddSampleCallback(std::move(updateDefinePerSample));
688
690 newCols.AddDefine(jittedDefine);
691
692 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
693
694 return newInterface;
695 }
696
697 /// \brief Register systematic variations for an existing column.
698 /// \param[in] colName name of the column for which varied values are provided.
699 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
700 /// take any column values as input, similarly to what happens with Filter and Define calls. It must
701 /// return an RVec of varied values, one for each variation tag, in the same order as the tags.
702 /// \param[in] inputColumns the names of the columns to be passed to the callable.
703 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
704 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
705 ///
706 /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to
707 /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for
708 /// results that depend on any varied quantity a map/dictionary of varied results can be produced with
709 /// ROOT::RDF::Experimental::VariationsFor (see the example below).
710 ///
711 /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and
712 /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or
713 /// indirect dependencies of the column values on some registered variations). The keys will be a composition of
714 /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below.
715 ///
716 /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt.
717 /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"):
718 /// ~~~{.cpp}
719 /// auto nominal_hx =
720 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"})
721 /// .Filter("pt > k")
722 /// .Define("x", someFunc, {"pt"})
723 /// .Histo1D("x");
724 ///
725 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
726 /// hx["nominal"].Draw();
727 /// hx["pt:down"].Draw("SAME");
728 /// ~~~
729 template <typename F>
730 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
731 const std::vector<std::string> &variationTags, std::string_view variationName = "")
732 {
733 std::vector<std::string> colNames{{std::string(colName)}};
734 const std::string theVariationName{variationName.empty() ? colName : variationName};
735
736 return Vary(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags, theVariationName);
737 }
738
739 /// \brief Register systematic variations for an existing columns using auto-generated variation tags.
740 /// This overload of Vary takes a nVariations parameter instead of a list of tag names. Tag names
741 /// will be auto-generated as the sequence 0...nVariations-1.
742 /// See the documentation of the previous overload for more information.
743 template <typename F>
744 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
745 std::size_t nVariations, std::string_view variationName = "")
746 {
747 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
748
749 std::vector<std::string> variationTags;
750 variationTags.reserve(nVariations);
751 for (std::size_t i = 0u; i < nVariations; ++i)
752 variationTags.emplace_back(std::to_string(i));
753
754 const std::string theVariationName{variationName.empty() ? colName : variationName};
755
756 return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName);
757 }
758
759 /// \brief Register a systematic variation that affects multiple columns simultaneously.
760 /// This overload of Vary takes a list of column names as first argument rather than a single name and
761 /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each
762 /// affected column.
763 /// See the documentation of the first Vary overload for more information.
764 ///
765 /// Example usage:
766 /// ~~~{.cpp}
767 /// // produce variations "ptAndEta:down" and "ptAndEta:up"
768 /// df.Vary({"pt", "eta"},
769 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
770 /// {"down", "up"},
771 /// "ptAndEta");
772 /// ~~~
773 template <typename F>
775 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
776 const std::vector<std::string> &variationTags, std::string_view variationName)
777 {
778 using F_t = std::decay_t<F>;
779 using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types;
780 using RetType = typename TTraits::CallableTraits<F_t>::ret_type;
781 constexpr auto nColumns = ColTypes_t::list_size;
782
783 SanityChecksForVary<RetType>(colNames, variationTags, variationName);
784
785 const auto validColumnNames = GetValidatedColumnNames(nColumns, inputColumns);
786 CheckAndFillDSColumns(validColumnNames, ColTypes_t{});
787
788 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
789 if (retTypeName.empty()) {
790 // The type is not known to the interpreter, but we don't want to error out
791 // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name.
792 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
793 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
794 }
795
796 auto variation = std::make_shared<RDFInternal::RVariation<F_t>>(
797 colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager,
798 validColumnNames);
799
801 newCols.AddVariation(variation);
802
803 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
804
805 return newInterface;
806 }
807
808 /// \brief Register systematic variations for one or more existing columns using auto-generated tags.
809 /// This overload of Vary takes a nVariations parameter instead of a list of tag names. Tag names
810 /// will be auto-generated as the sequence 0...nVariations-1.
811 /// See the documentation of the previous overload for more information.
812 template <typename F>
814 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
815 std::size_t nVariations, std::string_view variationName)
816 {
817 R__ASSERT(nVariations > 0 && "Must have at least one variation.");
818
819 std::vector<std::string> variationTags;
820 variationTags.reserve(nVariations);
821 for (std::size_t i = 0u; i < nVariations; ++i)
822 variationTags.emplace_back(std::to_string(i));
823
824 return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName);
825 }
826
827 /// \brief Register systematic variations for an existing column.
828 /// \param[in] colName name of the column for which varied values are provided.
829 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
830 /// values for the specified column.
831 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
832 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
833 /// colName is used if none is provided.
834 ///
835 /// ~~~{.cpp}
836 /// auto nominal_hx =
837 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"})
838 /// .Filter("pt > k")
839 /// .Define("x", someFunc, {"pt"})
840 /// .Histo1D("x");
841 ///
842 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
843 /// hx["nominal"].Draw();
844 /// hx["pt:down"].Draw("SAME");
845 /// ~~~
847 const std::vector<std::string> &variationTags, std::string_view variationName = "")
848 {
849 std::vector<std::string> colNames{{std::string(colName)}};
850 const std::string theVariationName{variationName.empty() ? colName : variationName};
851
852 return Vary(std::move(colNames), expression, variationTags, theVariationName);
853 }
854
855 /// \brief Register systematic variations for an existing column.
856 /// \param[in] colName name of the column for which varied values are provided.
857 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
858 /// values for the specified column.
859 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be "0", "1", etc.
860 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
861 /// colName is used if none is provided.
862 ///
863 /// See the documentation for the previous overload for more information.
864 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations,
865 std::string_view variationName = "")
866 {
867 std::vector<std::string> colNames{{std::string(colName)}};
868 const std::string theVariationName{variationName.empty() ? colName : variationName};
869
870 return Vary(std::move(colNames), expression, nVariations, theVariationName);
871 }
872
873 /// \brief Register systematic variations for one or more existing columns.
874 /// \param[in] colNames names of the columns for which varied values are provided.
875 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
876 /// values for the specified columns.
877 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be "0", "1", etc.
878 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
879 ///
880 /// ~~~{.cpp}
881 /// auto nominal_hx =
882 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy")
883 /// .Histo1D("x", "y");
884 ///
885 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
886 /// hx["nominal"].Draw();
887 /// hx["xy:0"].Draw("SAME");
888 /// hx["xy:1"].Draw("SAME");
889 /// ~~~
890 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
891 std::size_t nVariations, std::string_view variationName)
892 {
893 std::vector<std::string> variationTags;
894 variationTags.reserve(nVariations);
895 for (std::size_t i = 0u; i < nVariations; ++i)
896 variationTags.emplace_back(std::to_string(i));
897
898 return Vary(colNames, expression, std::move(variationTags), variationName);
899 }
900
901 /// \brief Register systematic variations for one or more existing columns.
902 /// \param[in] colNames names of the columns for which varied values are provided.
903 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
904 /// values for the specified columns.
905 /// \param[in] variationTags names for each of the varied values, e.g. "up" and "down".
906 /// \param[in] variationName a generic name for this set of varied values, e.g. "ptvariation".
907 ///
908 /// ~~~{.cpp}
909 /// auto nominal_hx =
910 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy")
911 /// .Histo1D("x", "y");
912 ///
913 /// auto hx = ROOT::RDF::VariationsFor(nominal_hx);
914 /// hx["nominal"].Draw();
915 /// hx["xy:down"].Draw("SAME");
916 /// hx["xy:up"].Draw("SAME");
917 /// ~~~
918 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
919 const std::vector<std::string> &variationTags, std::string_view variationName)
920 {
921 R__ASSERT(variationTags.size() > 0 && "Must have at least one variation.");
922 R__ASSERT(colNames.size() > 0 && "Must have at least one varied column.");
923 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
924
925 for (auto &colName : colNames) {
926 RDFInternal::CheckValidCppVarName(colName, "Vary");
929 }
930 RDFInternal::CheckValidCppVarName(variationName, "Vary");
931
932 // when varying multiple columns, they must be different columns
933 if (colNames.size() > 1) {
934 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
935 if (uniqueCols.size() != colNames.size())
936 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
937 }
938
939 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
940 auto jittedVariation =
941 RDFInternal::BookVariationJit(colNames, variationName, variationTags, expression, *fLoopManager, fDataSource,
942 fColRegister, fLoopManager->GetBranchNames(), upcastNodeOnHeap);
943
945 newColRegister.AddVariation(std::move(jittedVariation));
946
947 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newColRegister), fDataSource);
948
949 return newInterface;
950 }
951
952 ////////////////////////////////////////////////////////////////////////////
953 /// \brief Allow to refer to a column with a different name.
954 /// \param[in] alias name of the column alias
955 /// \param[in] columnName of the column to be aliased
956 /// \return the first node of the computation graph for which the alias is available.
957 ///
958 /// Aliasing an alias is supported.
959 ///
960 /// ### Example usage:
961 /// ~~~{.cpp}
962 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
963 /// ~~~
965 {
966 // The symmetry with Define is clear. We want to:
967 // - Create globally the alias and return this very node, unchanged
968 // - Make aliases accessible based on chains and not globally
969
970 // Helper to find out if a name is a column
971 auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
972
973 constexpr auto where = "Alias";
975 // If the alias name is a column name, there is a problem
977
978 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
979
981 newCols.AddAlias(alias, validColumnName);
982
983 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
984
985 return newInterface;
986 }
987
988 ////////////////////////////////////////////////////////////////////////////
989 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
990 /// \tparam ColumnTypes variadic list of branch/column types.
991 /// \param[in] treename The name of the output TTree.
992 /// \param[in] filename The name of the output TFile.
993 /// \param[in] columnList The list of names of the columns/branches to be written.
994 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
995 /// \return a `RDataFrame` that wraps the snapshotted dataset.
996 ///
997 /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
998 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
999 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
1000 /// written out and it appears before the array in the columnList.
1001 ///
1002 /// By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other
1003 /// types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are
1004 /// present, by default all friend top-level branches that have names that do not collide with
1005 /// names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out
1006 /// friend branches with the same names of branches in the main TTree/TChain with names of the form
1007 /// `<friendname>_<branchname>` in order to differentiate them from the branches in the main tree/chain.
1008 ///
1009 /// ### Writing to a sub-directory
1010 ///
1011 /// Snapshot supports writing the TTree in a sub-directory inside the TFile. It is sufficient to specify the path to
1012 /// the TTree as part of the TTree name, e.g. `df.Snapshot("subdir/t", "f.root")` write TTree `t` in the
1013 /// sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed).
1014 ///
1015 /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of
1016 /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled with
1017 /// respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in wrong
1018 /// associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will
1019 /// error out if such a "shuffled" TTree is used in a friendship.
1020 ///
1021 /// \note In case no events are written out (e.g. because no event passes all filters) the behavior of Snapshot in
1022 /// single-thread and multi-thread runs is different: in single-thread runs, Snapshot will write out a TTree with
1023 /// the specified name and zero entries; in multi-thread runs, no TTree object will be written out to disk.
1024 ///
1025 /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns
1026 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1027 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1028 /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`.
1029 ///
1030 /// ### Example invocations:
1031 ///
1032 /// ~~~{.cpp}
1033 /// // without specifying template parameters (column types automatically deduced)
1034 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
1035 ///
1036 /// // specifying template parameters ("x" is `int`, "y" is `float`)
1037 /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
1038 /// ~~~
1039 ///
1040 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
1041 /// `RSnapshotOptions`:
1042 /// ~~~{.cpp}
1043 /// RSnapshotOptions opts;
1044 /// opts.fLazy = true;
1045 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
1046 /// ~~~
1047 template <typename... ColumnTypes>
1050 const RSnapshotOptions &options = RSnapshotOptions())
1051 {
1052 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
1053 }
1054
1055 ////////////////////////////////////////////////////////////////////////////
1056 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1057 /// \param[in] treename The name of the output TTree.
1058 /// \param[in] filename The name of the output TFile.
1059 /// \param[in] columnList The list of names of the columns/branches to be written.
1060 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1061 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1062 ///
1063 /// This function returns a `RDataFrame` built with the output tree as a source.
1064 /// The types of the columns are automatically inferred and do not need to be specified.
1065 ///
1066 /// See above for a more complete description and example usages.
1068 const ColumnNames_t &columnList,
1069 const RSnapshotOptions &options = RSnapshotOptions())
1070 {
1071 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
1072 const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
1074
1075 const auto fullTreeName = treename;
1076 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
1077 treename = parsedTreePath.fTreeName;
1078 const auto &dirname = parsedTreePath.fDirName;
1079
1080 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
1081 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
1082
1084 auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, validCols);
1085
1086 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
1087 validCols, newRDF, snapHelperArgs, validCols.size());
1088
1089 if (!options.fLazy)
1090 *resPtr;
1091 return resPtr;
1092 }
1093
1094 // clang-format off
1095 ////////////////////////////////////////////////////////////////////////////
1096 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1097 /// \param[in] treename The name of the output TTree.
1098 /// \param[in] filename The name of the output TFile.
1099 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1100 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
1101 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1102 ///
1103 /// This function returns a `RDataFrame` built with the output tree as a source.
1104 /// The types of the columns are automatically inferred and do not need to be specified.
1105 ///
1106 /// See above for a more complete description and example usages.
1108 std::string_view columnNameRegexp = "",
1109 const RSnapshotOptions &options = RSnapshotOptions())
1110 {
1111 const auto definedColumns = fColRegister.GetNames();
1112 auto *tree = fLoopManager->GetTree();
1113 const auto treeBranchNames = tree != nullptr ? RDFInternal::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1114 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1115 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1116 ColumnNames_t dsColumnsWithoutSizeColumns;
1117 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1118 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1119 ColumnNames_t columnNames;
1120 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size());
1121 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1122 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1123 columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end());
1124
1125 // De-duplicate column names. Currently the only way this can happen is if a column coming from a tree or
1126 // data-source is Redefine'd.
1127 std::set<std::string> uniqueCols(columnNames.begin(), columnNames.end());
1128 columnNames.assign(uniqueCols.begin(), uniqueCols.end());
1129
1130 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
1131 return Snapshot(treename, filename, selectedColumns, options);
1132 }
1133 // clang-format on
1134
1135 // clang-format off
1136 ////////////////////////////////////////////////////////////////////////////
1137 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1138 /// \param[in] treename The name of the output TTree.
1139 /// \param[in] filename The name of the output TFile.
1140 /// \param[in] columnList The list of names of the columns/branches to be written.
1141 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1142 /// \return a `RDataFrame` that wraps the snapshotted dataset.
1143 ///
1144 /// This function returns a `RDataFrame` built with the output tree as a source.
1145 /// The types of the columns are automatically inferred and do not need to be specified.
1146 ///
1147 /// See above for a more complete description and example usages.
1149 std::initializer_list<std::string> columnList,
1150 const RSnapshotOptions &options = RSnapshotOptions())
1151 {
1152 ColumnNames_t selectedColumns(columnList);
1153 return Snapshot(treename, filename, selectedColumns, options);
1154 }
1155 // clang-format on
1156
1157 ////////////////////////////////////////////////////////////////////////////
1158 /// \brief Save selected columns in memory.
1159 /// \tparam ColumnTypes variadic list of branch/column types.
1160 /// \param[in] columnList columns to be cached in memory.
1161 /// \return a `RDataFrame` that wraps the cached dataset.
1162 ///
1163 /// This action returns a new `RDataFrame` object, completely detached from
1164 /// the originating `RDataFrame`. The new dataframe only contains the cached
1165 /// columns and stores their content in memory for fast, zero-copy subsequent access.
1166 ///
1167 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
1168 /// fits in memory and that will be accessed many times.
1169 ///
1170 /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns
1171 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1172 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1173 /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`.
1174 ///
1175 /// ### Example usage:
1176 ///
1177 /// **Types and columns specified:**
1178 /// ~~~{.cpp}
1179 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
1180 /// ~~~
1181 ///
1182 /// **Types inferred and columns specified (this invocation relies on jitting):**
1183 /// ~~~{.cpp}
1184 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
1185 /// ~~~
1186 ///
1187 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
1188 /// ~~~{.cpp}
1189 /// auto cache_all_cols_df = df.Cache(myRegexp);
1190 /// ~~~
1191 template <typename... ColumnTypes>
1193 {
1194 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
1195 return CacheImpl<ColumnTypes...>(columnList, staticSeq);
1196 }
1197
1198 ////////////////////////////////////////////////////////////////////////////
1199 /// \brief Save selected columns in memory.
1200 /// \param[in] columnList columns to be cached in memory
1201 /// \return a `RDataFrame` that wraps the cached dataset.
1202 ///
1203 /// See the previous overloads for more information.
1205 {
1206 // Early return: if the list of columns is empty, just return an empty RDF
1207 // If we proceed, the jitted call will not compile!
1208 if (columnList.empty()) {
1209 auto nEntries = *this->Count();
1210 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
1211 return emptyRDF;
1212 }
1213
1214 std::stringstream cacheCall;
1215 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
1216 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
1218 // build a string equivalent to
1219 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
1220 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
1221 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
1223 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
1224 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
1225
1226 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache");
1227
1228 const auto validColumnNames =
1229 GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
1230 const auto colTypes = GetValidatedArgTypes(validColumnNames, fColRegister, fLoopManager->GetTree(), fDataSource,
1231 "Cache", /*vector2rvec=*/false);
1232 for (const auto &colType : colTypes)
1233 cacheCall << colType << ", ";
1234 if (!columnListWithoutSizeColumns.empty())
1235 cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
1236 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
1237 << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));";
1238
1239 // book the code to jit with the RLoopManager and trigger the event loop
1240 fLoopManager->ToJitExec(cacheCall.str());
1241 fLoopManager->Jit();
1242
1243 return resRDF;
1244 }
1245
1246 ////////////////////////////////////////////////////////////////////////////
1247 /// \brief Save selected columns in memory.
1248 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1249 /// \return a `RDataFrame` that wraps the cached dataset.
1250 ///
1251 /// The existing columns are matched against the regular expression. If the string provided
1252 /// is empty, all columns are selected. See the previous overloads for more information.
1254 {
1255 const auto definedColumns = fColRegister.GetNames();
1256 auto *tree = fLoopManager->GetTree();
1257 const auto treeBranchNames = tree != nullptr ? RDFInternal::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1258 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1259 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1260 ColumnNames_t dsColumnsWithoutSizeColumns;
1261 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1262 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1263 ColumnNames_t columnNames;
1264 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size());
1265 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1266 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1267 columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end());
1268 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache");
1269 return Cache(selectedColumns);
1270 }
1271
1272 ////////////////////////////////////////////////////////////////////////////
1273 /// \brief Save selected columns in memory.
1274 /// \param[in] columnList columns to be cached in memory.
1275 /// \return a `RDataFrame` that wraps the cached dataset.
1276 ///
1277 /// See the previous overloads for more information.
1278 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
1279 {
1280 ColumnNames_t selectedColumns(columnList);
1281 return Cache(selectedColumns);
1282 }
1283
1284 // clang-format off
1285 ////////////////////////////////////////////////////////////////////////////
1286 /// \brief Creates a node that filters entries based on range: [begin, end).
1287 /// \param[in] begin Initial entry number considered for this range.
1288 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1289 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
1290 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
1291 ///
1292 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
1293 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
1294 ///
1295 /// ### Example usage:
1296 /// ~~~{.cpp}
1297 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
1298 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
1299 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
1300 /// ~~~
1301 // clang-format on
1302 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
1303 {
1304 // check invariants
1305 if (stride == 0 || (end != 0 && end < begin))
1306 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
1307 CheckIMTDisabled("Range");
1308
1310 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
1312 return tdf_r;
1313 }
1314
1315 // clang-format off
1316 ////////////////////////////////////////////////////////////////////////////
1317 /// \brief Creates a node that filters entries based on range.
1318 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1319 /// \return a node of the computation graph for which the range is defined.
1320 ///
1321 /// See the other Range overload for a detailed description.
1322 // clang-format on
1323 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
1324
1325 // clang-format off
1326 ////////////////////////////////////////////////////////////////////////////
1327 /// \brief Execute a user-defined function on each entry (*instant action*).
1328 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1329 /// \param[in] columns Names of the columns/branches in input to the user function.
1330 ///
1331 /// The callable `f` is invoked once per entry. This is an *instant action*:
1332 /// upon invocation, an event loop as well as execution of all scheduled actions
1333 /// is triggered.
1334 /// Users are responsible for the thread-safety of this callable when executing
1335 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
1336 ///
1337 /// ### Example usage:
1338 /// ~~~{.cpp}
1339 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
1340 /// ~~~
1341 // clang-format on
1342 template <typename F>
1343 void Foreach(F f, const ColumnNames_t &columns = {})
1344 {
1345 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
1346 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
1347 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
1348 }
1349
1350 // clang-format off
1351 ////////////////////////////////////////////////////////////////////////////
1352 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*).
1353 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1354 /// \param[in] columns Names of the columns/branches in input to the user function.
1355 ///
1356 /// Same as `Foreach`, but the user-defined function takes an extra
1357 /// `unsigned int` as its first parameter, the *processing slot index*.
1358 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
1359 /// for each thread of execution.
1360 /// This is meant as a helper in writing thread-safe `Foreach`
1361 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
1362 /// The user-defined processing callable is able to follow different
1363 /// *streams of processing* indexed by the first parameter.
1364 /// `ForeachSlot` works just as well with single-thread execution: in that
1365 /// case `slot` will always be `0`.
1366 ///
1367 /// ### Example usage:
1368 /// ~~~{.cpp}
1369 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
1370 /// ~~~
1371 // clang-format on
1372 template <typename F>
1373 void ForeachSlot(F f, const ColumnNames_t &columns = {})
1374 {
1375 using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
1376 constexpr auto nColumns = ColTypes_t::list_size;
1377
1378 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1379 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
1380
1381 using Helper_t = RDFInternal::ForeachSlotHelper<F>;
1383
1384 auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister);
1385
1386 fLoopManager->Run();
1387 }
1388
1389 // clang-format off
1390 ////////////////////////////////////////////////////////////////////////////
1391 /// \brief Execute a user-defined reduce operation on the values of a column.
1392 /// \tparam F The type of the reduce callable. Automatically deduced.
1393 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1394 /// \param[in] f A callable with signature `T(T,T)`
1395 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1396 /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr.
1397 ///
1398 /// A reduction takes two values of a column and merges them into one (e.g.
1399 /// by summing them, taking the maximum, etc). This action performs the
1400 /// specified reduction operation on all processed column values, returning
1401 /// a single value of the same type. The callable f must satisfy the general
1402 /// requirements of a *processing function* besides having signature `T(T,T)`
1403 /// where `T` is the type of column columnName.
1404 ///
1405 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
1406 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
1407 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
1408 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
1409 /// overload.
1410 ///
1411 /// ### Example usage:
1412 /// ~~~{.cpp}
1413 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
1414 /// ~~~
1415 ///
1416 /// This action is *lazy*: upon invocation of this method the calculation is
1417 /// booked but not executed. Also see RResultPtr.
1418 // clang-format on
1419 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1421 {
1422 static_assert(
1424 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
1425 return Reduce(std::move(f), columnName, T());
1426 }
1427
1428 ////////////////////////////////////////////////////////////////////////////
1429 /// \brief Execute a user-defined reduce operation on the values of a column.
1430 /// \tparam F The type of the reduce callable. Automatically deduced.
1431 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1432 /// \param[in] f A callable with signature `T(T,T)`
1433 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1434 /// \param[in] redIdentity The reduced object of each thread is initialized to this value.
1435 /// \return the reduced quantity wrapped in a RResultPtr.
1436 ///
1437 /// ### Example usage:
1438 /// ~~~{.cpp}
1439 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
1440 /// ~~~
1441 /// See the description of the first Reduce overload for more information.
1442 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1443 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
1444 {
1445 return Aggregate(f, f, columnName, redIdentity);
1446 }
1447
1448 ////////////////////////////////////////////////////////////////////////////
1449 /// \brief Return the number of entries processed (*lazy action*).
1450 /// \return the number of entries wrapped in a RResultPtr.
1451 ///
1452 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
1453 /// This action is *lazy*: upon invocation of this method the calculation is
1454 /// booked but not executed. Also see RResultPtr.
1455 ///
1456 /// ### Example usage:
1457 /// ~~~{.cpp}
1458 /// auto nEntriesAfterCuts = myFilteredDf.Count();
1459 /// ~~~
1460 ///
1462 {
1463 const auto nSlots = fLoopManager->GetNSlots();
1464 auto cSPtr = std::make_shared<ULong64_t>(0);
1465 using Helper_t = RDFInternal::CountHelper;
1467 auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
1469 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
1470 }
1471
1472 ////////////////////////////////////////////////////////////////////////////
1473 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default).
1474 /// \tparam T The type of the column.
1475 /// \tparam COLL The type of collection used to store the values.
1476 /// \param[in] column The name of the column to collect the values of.
1477 /// \return the content of the selected column wrapped in a RResultPtr.
1478 ///
1479 /// The collection type to be specified for C-style array columns is `RVec<T>`:
1480 /// in this case the returned collection is a `std::vector<RVec<T>>`.
1481 /// ### Example usage:
1482 /// ~~~{.cpp}
1483 /// // In this case intCol is a std::vector<int>
1484 /// auto intCol = rdf.Take<int>("integerColumn");
1485 /// // Same content as above but in this case taken as a RVec<int>
1486 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
1487 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
1488 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
1489 /// ~~~
1490 /// This action is *lazy*: upon invocation of this method the calculation is
1491 /// booked but not executed. Also see RResultPtr.
1492 template <typename T, typename COLL = std::vector<T>>
1494 {
1495 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
1496
1497 const auto validColumnNames = GetValidatedColumnNames(1, columns);
1498 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
1499
1500 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1502 auto valuesPtr = std::make_shared<COLL>();
1503 const auto nSlots = fLoopManager->GetNSlots();
1504
1505 auto action =
1506 std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister);
1507 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
1508 }
1509
1510 ////////////////////////////////////////////////////////////////////////////
1511 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1512 /// \tparam V The type of the column used to fill the histogram.
1513 /// \param[in] model The returned histogram will be constructed using this as a model.
1514 /// \param[in] vName The name of the column that will fill the histogram.
1515 /// \return the monodimensional histogram wrapped in a RResultPtr.
1516 ///
1517 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
1518 /// is filled with each one of the elements of the container. In case multiple columns of container type
1519 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1520 /// possibly different lengths between events).
1521 /// This action is *lazy*: upon invocation of this method the calculation is
1522 /// booked but not executed. Also see RResultPtr.
1523 ///
1524 /// ### Example usage:
1525 /// ~~~{.cpp}
1526 /// // Deduce column type (this invocation needs jitting internally)
1527 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1528 /// // Explicit column type
1529 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1530 /// ~~~
1531 ///
1532 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1533 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1534 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1535 template <typename V = RDFDetail::RInferredType>
1536 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
1537 {
1538 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
1539
1540 const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
1541
1542 std::shared_ptr<::TH1D> h(nullptr);
1543 {
1544 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1545 h = model.GetHistogram();
1546 h->SetDirectory(nullptr);
1547 }
1548
1549 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
1550 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
1551 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h);
1552 }
1553
1554 ////////////////////////////////////////////////////////////////////////////
1555 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1556 /// \tparam V The type of the column used to fill the histogram.
1557 /// \param[in] vName The name of the column that will fill the histogram.
1558 /// \return the monodimensional histogram wrapped in a RResultPtr.
1559 ///
1560 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1561 /// The "name" and "title" strings are built starting from the input column name.
1562 /// See the description of the first Histo1D() overload for more details.
1563 ///
1564 /// ### Example usage:
1565 /// ~~~{.cpp}
1566 /// // Deduce column type (this invocation needs jitting internally)
1567 /// auto myHist1 = myDf.Histo1D("myColumn");
1568 /// // Explicit column type
1569 /// auto myHist2 = myDf.Histo1D<float>("myColumn");
1570 /// ~~~
1571 template <typename V = RDFDetail::RInferredType>
1573 {
1574 const auto h_name = std::string(vName);
1575 const auto h_title = h_name + ";" + h_name + ";count";
1576 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1577 }
1578
1579 ////////////////////////////////////////////////////////////////////////////
1580 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1581 /// \tparam V The type of the column used to fill the histogram.
1582 /// \tparam W The type of the column used as weights.
1583 /// \param[in] model The returned histogram will be constructed using this as a model.
1584 /// \param[in] vName The name of the column that will fill the histogram.
1585 /// \param[in] wName The name of the column that will provide the weights.
1586 /// \return the monodimensional histogram wrapped in a RResultPtr.
1587 ///
1588 /// See the description of the first Histo1D() overload for more details.
1589 ///
1590 /// ### Example usage:
1591 /// ~~~{.cpp}
1592 /// // Deduce column type (this invocation needs jitting internally)
1593 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1594 /// // Explicit column type
1595 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1596 /// ~~~
1597 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1599 {
1600 const std::vector<std::string_view> columnViews = {vName, wName};
1601 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1602 ? ColumnNames_t()
1603 : ColumnNames_t(columnViews.begin(), columnViews.end());
1604 std::shared_ptr<::TH1D> h(nullptr);
1605 {
1606 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1607 h = model.GetHistogram();
1608 }
1609 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h);
1610 }
1611
1612 ////////////////////////////////////////////////////////////////////////////
1613 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1614 /// \tparam V The type of the column used to fill the histogram.
1615 /// \tparam W The type of the column used as weights.
1616 /// \param[in] vName The name of the column that will fill the histogram.
1617 /// \param[in] wName The name of the column that will provide the weights.
1618 /// \return the monodimensional histogram wrapped in a RResultPtr.
1619 ///
1620 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1621 /// The "name" and "title" strings are built starting from the input column names.
1622 /// See the description of the first Histo1D() overload for more details.
1623 ///
1624 /// ### Example usage:
1625 /// ~~~{.cpp}
1626 /// // Deduce column types (this invocation needs jitting internally)
1627 /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1628 /// // Explicit column types
1629 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1630 /// ~~~
1631 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1633 {
1634 // We build name and title based on the value and weight column names
1635 std::string str_vName{vName};
1636 std::string str_wName{wName};
1637 const auto h_name = str_vName + "_weighted_" + str_wName;
1638 const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1639 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1640 }
1641
1642 ////////////////////////////////////////////////////////////////////////////
1643 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1644 /// \tparam V The type of the column used to fill the histogram.
1645 /// \tparam W The type of the column used as weights.
1646 /// \param[in] model The returned histogram will be constructed using this as a model.
1647 /// \return the monodimensional histogram wrapped in a RResultPtr.
1648 ///
1649 /// This overload will use the first two default columns as column names.
1650 /// See the description of the first Histo1D() overload for more details.
1651 template <typename V, typename W>
1652 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1653 {
1654 return Histo1D<V, W>(model, "", "");
1655 }
1656
1657 ////////////////////////////////////////////////////////////////////////////
1658 /// \brief Fill and return a two-dimensional histogram (*lazy action*).
1659 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1660 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1661 /// \param[in] model The returned histogram will be constructed using this as a model.
1662 /// \param[in] v1Name The name of the column that will fill the x axis.
1663 /// \param[in] v2Name The name of the column that will fill the y axis.
1664 /// \return the bidimensional histogram wrapped in a RResultPtr.
1665 ///
1666 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1667 /// is filled with each one of the elements of the container. In case multiple columns of container type
1668 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1669 /// possibly different lengths between events).
1670 /// This action is *lazy*: upon invocation of this method the calculation is
1671 /// booked but not executed. Also see RResultPtr.
1672 ///
1673 /// ### Example usage:
1674 /// ~~~{.cpp}
1675 /// // Deduce column types (this invocation needs jitting internally)
1676 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1677 /// // Explicit column types
1678 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1679 /// ~~~
1680 ///
1681 ///
1682 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1683 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1684 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1685 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1687 {
1688 std::shared_ptr<::TH2D> h(nullptr);
1689 {
1690 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1691 h = model.GetHistogram();
1692 }
1693 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1694 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1695 }
1696 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1697 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1698 ? ColumnNames_t()
1699 : ColumnNames_t(columnViews.begin(), columnViews.end());
1700 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h);
1701 }
1702
1703 ////////////////////////////////////////////////////////////////////////////
1704 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*).
1705 /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1706 /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1707 /// \tparam W The type of the column used for the weights of the histogram.
1708 /// \param[in] model The returned histogram will be constructed using this as a model.
1709 /// \param[in] v1Name The name of the column that will fill the x axis.
1710 /// \param[in] v2Name The name of the column that will fill the y axis.
1711 /// \param[in] wName The name of the column that will provide the weights.
1712 /// \return the bidimensional histogram wrapped in a RResultPtr.
1713 ///
1714 /// This action is *lazy*: upon invocation of this method the calculation is
1715 /// booked but not executed. Also see RResultPtr.
1716 ///
1717 /// ### Example usage:
1718 /// ~~~{.cpp}
1719 /// // Deduce column types (this invocation needs jitting internally)
1720 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1721 /// // Explicit column types
1722 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1723 /// ~~~
1724 ///
1725 /// See the documentation of the first Histo2D() overload for more details.
1726 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1727 typename W = RDFDetail::RInferredType>
1730 {
1731 std::shared_ptr<::TH2D> h(nullptr);
1732 {
1733 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1734 h = model.GetHistogram();
1735 }
1736 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1737 throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1738 }
1739 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1740 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1741 ? ColumnNames_t()
1742 : ColumnNames_t(columnViews.begin(), columnViews.end());
1743 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h);
1744 }
1745
1746 template <typename V1, typename V2, typename W>
1748 {
1749 return Histo2D<V1, V2, W>(model, "", "", "");
1750 }
1751
1752 ////////////////////////////////////////////////////////////////////////////
1753 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1754 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1755 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1756 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1757 /// \param[in] model The returned histogram will be constructed using this as a model.
1758 /// \param[in] v1Name The name of the column that will fill the x axis.
1759 /// \param[in] v2Name The name of the column that will fill the y axis.
1760 /// \param[in] v3Name The name of the column that will fill the z axis.
1761 /// \return the tridimensional histogram wrapped in a RResultPtr.
1762 ///
1763 /// This action is *lazy*: upon invocation of this method the calculation is
1764 /// booked but not executed. Also see RResultPtr.
1765 ///
1766 /// ### Example usage:
1767 /// ~~~{.cpp}
1768 /// // Deduce column types (this invocation needs jitting internally)
1769 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1770 /// "myValueX", "myValueY", "myValueZ");
1771 /// // Explicit column types
1772 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1773 /// "myValueX", "myValueY", "myValueZ");
1774 /// ~~~
1775 ///
1776 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1777 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1778 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1779 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1780 typename V3 = RDFDetail::RInferredType>
1782 std::string_view v3Name = "")
1783 {
1784 std::shared_ptr<::TH3D> h(nullptr);
1785 {
1786 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1787 h = model.GetHistogram();
1788 }
1789 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1790 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1791 }
1792 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1793 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1794 ? ColumnNames_t()
1795 : ColumnNames_t(columnViews.begin(), columnViews.end());
1796 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h);
1797 }
1798
1799 ////////////////////////////////////////////////////////////////////////////
1800 /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1801 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1802 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1803 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1804 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1805 /// \param[in] model The returned histogram will be constructed using this as a model.
1806 /// \param[in] v1Name The name of the column that will fill the x axis.
1807 /// \param[in] v2Name The name of the column that will fill the y axis.
1808 /// \param[in] v3Name The name of the column that will fill the z axis.
1809 /// \param[in] wName The name of the column that will provide the weights.
1810 /// \return the tridimensional histogram wrapped in a RResultPtr.
1811 ///
1812 /// This action is *lazy*: upon invocation of this method the calculation is
1813 /// booked but not executed. Also see RResultPtr.
1814 ///
1815 /// ### Example usage:
1816 /// ~~~{.cpp}
1817 /// // Deduce column types (this invocation needs jitting internally)
1818 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1819 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1820 /// // Explicit column types
1821 /// using d_t = double;
1822 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1823 /// "myValueX", "myValueY", "myValueZ", "myWeight");
1824 /// ~~~
1825 ///
1826 ///
1827 /// See the documentation of the first Histo2D() overload for more details.
1828 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1829 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1831 std::string_view v3Name, std::string_view wName)
1832 {
1833 std::shared_ptr<::TH3D> h(nullptr);
1834 {
1835 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1836 h = model.GetHistogram();
1837 }
1838 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1839 throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1840 }
1841 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1842 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1843 ? ColumnNames_t()
1844 : ColumnNames_t(columnViews.begin(), columnViews.end());
1845 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h);
1846 }
1847
1848 template <typename V1, typename V2, typename V3, typename W>
1850 {
1851 return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1852 }
1853
1854 ////////////////////////////////////////////////////////////////////////////
1855 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
1856 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not
1857 /// present.
1858 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the
1859 /// object.
1860 /// \param[in] model The returned histogram will be constructed using this as a model.
1861 /// \param[in] columnList
1862 /// A list containing the names of the columns that will be passed when calling `Fill`.
1863 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
1864 /// \return the N-dimensional histogram wrapped in a RResultPtr.
1865 ///
1866 /// This action is *lazy*: upon invocation of this method the calculation is
1867 /// booked but not executed. See RResultPtr documentation.
1868 ///
1869 /// ### Example usage:
1870 /// ~~~{.cpp}
1871 /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4,
1872 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
1873 /// {"col0", "col1", "col2", "col3"});
1874 /// ~~~
1875 ///
1876 template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads
1877 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
1878 {
1879 std::shared_ptr<::THnD> h(nullptr);
1880 {
1881 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1882 h = model.GetHistogram();
1883
1884 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
1885 h->Sumw2();
1886 } else if (int(columnList.size()) != h->GetNdimensions()) {
1887 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
1888 }
1889 }
1890 return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h);
1891 }
1892
1893 ////////////////////////////////////////////////////////////////////////////
1894 /// \brief Fill and return an N-dimensional histogram (*lazy action*).
1895 /// \param[in] model The returned histogram will be constructed using this as a model.
1896 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1897 /// (N columns for unweighted filling, or N+1 columns for weighted filling)
1898 /// \return the N-dimensional histogram wrapped in a RResultPtr.
1899 ///
1900 /// This action is *lazy*: upon invocation of this method the calculation is
1901 /// booked but not executed. Also see RResultPtr.
1902 ///
1903 /// ### Example usage:
1904 /// ~~~{.cpp}
1905 /// auto myFilledObj = myDf.HistoND({"name","title", 4,
1906 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
1907 /// {"col0", "col1", "col2", "col3"});
1908 /// ~~~
1909 ///
1910 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
1911 {
1912 std::shared_ptr<::THnD> h(nullptr);
1913 {
1914 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1915 h = model.GetHistogram();
1916
1917 if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
1918 h->Sumw2();
1919 } else if (int(columnList.size()) != h->GetNdimensions()) {
1920 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
1921 }
1922 }
1923 return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList, h, h,
1924 columnList.size());
1925 }
1926
1927 ////////////////////////////////////////////////////////////////////////////
1928 /// \brief Fill and return a TGraph object (*lazy action*).
1929 /// \tparam X The type of the column used to fill the x axis.
1930 /// \tparam Y The type of the column used to fill the y axis.
1931 /// \param[in] x The name of the column that will fill the x axis.
1932 /// \param[in] y The name of the column that will fill the y axis.
1933 /// \return the TGraph wrapped in a RResultPtr.
1934 ///
1935 /// Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph
1936 /// is filled with each one of the elements of the container.
1937 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1938 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1939 /// A name and a title to the TGraph is given based on the input column names.
1940 ///
1941 /// This action is *lazy*: upon invocation of this method the calculation is
1942 /// booked but not executed. Also see RResultPtr.
1943 ///
1944 /// ### Example usage:
1945 /// ~~~{.cpp}
1946 /// // Deduce column types (this invocation needs jitting internally)
1947 /// auto myGraph1 = myDf.Graph("xValues", "yValues");
1948 /// // Explicit column types
1949 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
1950 /// ~~~
1951 ///
1952 /// \note Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory
1953 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1954 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1955 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType>
1957 {
1958 auto graph = std::make_shared<::TGraph>();
1959 const std::vector<std::string_view> columnViews = {x, y};
1960 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1961 ? ColumnNames_t()
1962 : ColumnNames_t(columnViews.begin(), columnViews.end());
1963
1964 const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
1965
1966 // We build a default name and title based on the input columns
1967 const auto g_name = validatedColumns[0] + "_vs_" + validatedColumns[1];
1968 const auto g_title = validatedColumns[0] + " vs " + validatedColumns[1];
1969 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
1970 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
1971 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
1972
1973 return CreateAction<RDFInternal::ActionTags::Graph, X, Y>(validatedColumns, graph, graph);
1974 }
1975
1976 ////////////////////////////////////////////////////////////////////////////
1977 /// \brief Fill and return a TGraphAsymmErrors object (*lazy action*).
1978 /// \param[in] x The name of the column that will fill the x axis.
1979 /// \param[in] y The name of the column that will fill the y axis.
1980 /// \param[in] exl The name of the column of X low errors
1981 /// \param[in] exh The name of the column of X high errors
1982 /// \param[in] eyl The name of the column of Y low errors
1983 /// \param[in] eyh The name of the column of Y high errors
1984 /// \return the TGraphAsymmErrors wrapped in a RResultPtr.
1985 ///
1986 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1987 /// is filled with each one of the elements of the container.
1988 /// If Multithreading is enabled, the order in which points are inserted is undefined.
1989 ///
1990 /// This action is *lazy*: upon invocation of this method the calculation is
1991 /// booked but not executed. Also see RResultPtr.
1992 ///
1993 /// ### Example usage:
1994 /// ~~~{.cpp}
1995 /// // Deduce column types (this invocation needs jitting internally)
1996 /// auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh");
1997 /// // Explicit column types
1998 /// using f = float
1999 /// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2000 /// ~~~
2001 ///
2002 /// \note Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory
2003 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2004 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2005 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType,
2006 typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType,
2007 typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType>
2010 std::string_view exh = "", std::string_view eyl = "", std::string_view eyh = "")
2011 {
2012 auto graph = std::make_shared<::TGraphAsymmErrors>();
2013 const std::vector<std::string_view> columnViews = {x, y, exl, exh, eyl, eyh};
2014 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2015 ? ColumnNames_t()
2016 : ColumnNames_t(columnViews.begin(), columnViews.end());
2017
2018 const auto validatedColumns = GetValidatedColumnNames(6, userColumns);
2019
2020 // We build a default name and title based on the input columns
2021 const auto g_name = validatedColumns[0] + "_vs_" + validatedColumns[1];
2022 const auto g_title = validatedColumns[0] + " vs " + validatedColumns[1];
2023 graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2024 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2025 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2026
2027 return CreateAction<RDFInternal::ActionTags::GraphAsymmErrors, X, Y, EXL, EXH, EYL, EYH>(validatedColumns, graph,
2028 graph);
2029 }
2030
2031 ////////////////////////////////////////////////////////////////////////////
2032 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2033 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2034 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2035 /// \param[in] model The model to be considered to build the new return value.
2036 /// \param[in] v1Name The name of the column that will fill the x axis.
2037 /// \param[in] v2Name The name of the column that will fill the y axis.
2038 /// \return the monodimensional profile wrapped in a RResultPtr.
2039 ///
2040 /// This action is *lazy*: upon invocation of this method the calculation is
2041 /// booked but not executed. Also see RResultPtr.
2042 ///
2043 /// ### Example usage:
2044 /// ~~~{.cpp}
2045 /// // Deduce column types (this invocation needs jitting internally)
2046 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2047 /// // Explicit column types
2048 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2049 /// ~~~
2050 ///
2051 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2052 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2053 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2054 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
2056 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
2057 {
2058 std::shared_ptr<::TProfile> h(nullptr);
2059 {
2060 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2061 h = model.GetProfile();
2062 }
2063
2064 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2065 throw std::runtime_error("Profiles with no axes limits are not supported yet.");
2066 }
2067 const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2068 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2069 ? ColumnNames_t()
2070 : ColumnNames_t(columnViews.begin(), columnViews.end());
2071 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h);
2072 }
2073
2074 ////////////////////////////////////////////////////////////////////////////
2075 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2076 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2077 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2078 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
2079 /// \param[in] model The model to be considered to build the new return value.
2080 /// \param[in] v1Name The name of the column that will fill the x axis.
2081 /// \param[in] v2Name The name of the column that will fill the y axis.
2082 /// \param[in] wName The name of the column that will provide the weights.
2083 /// \return the monodimensional profile wrapped in a RResultPtr.
2084 ///
2085 /// This action is *lazy*: upon invocation of this method the calculation is
2086 /// booked but not executed. Also see RResultPtr.
2087 ///
2088 /// ### Example usage:
2089 /// ~~~{.cpp}
2090 /// // Deduce column types (this invocation needs jitting internally)
2091 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
2092 /// // Explicit column types
2093 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
2094 /// "xValues", "yValues", "weight");
2095 /// ~~~
2096 ///
2097 /// See the first Profile1D() overload for more details.
2098 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2099 typename W = RDFDetail::RInferredType>
2102 {
2103 std::shared_ptr<::TProfile> h(nullptr);
2104 {
2105 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2106 h = model.GetProfile();
2107 }
2108
2109 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2110 throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
2111 }
2112 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2113 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2114 ? ColumnNames_t()
2115 : ColumnNames_t(columnViews.begin(), columnViews.end());
2116 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h);
2117 }
2118
2119 ////////////////////////////////////////////////////////////////////////////
2120 /// \brief Fill and return a one-dimensional profile (*lazy action*).
2121 /// See the first Profile1D() overload for more details.
2122 template <typename V1, typename V2, typename W>
2124 {
2125 return Profile1D<V1, V2, W>(model, "", "", "");
2126 }
2127
2128 ////////////////////////////////////////////////////////////////////////////
2129 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2130 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2131 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2132 /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2133 /// \param[in] model The returned profile will be constructed using this as a model.
2134 /// \param[in] v1Name The name of the column that will fill the x axis.
2135 /// \param[in] v2Name The name of the column that will fill the y axis.
2136 /// \param[in] v3Name The name of the column that will fill the z axis.
2137 /// \return the bidimensional profile wrapped in a RResultPtr.
2138 ///
2139 /// This action is *lazy*: upon invocation of this method the calculation is
2140 /// booked but not executed. Also see RResultPtr.
2141 ///
2142 /// ### Example usage:
2143 /// ~~~{.cpp}
2144 /// // Deduce column types (this invocation needs jitting internally)
2145 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2146 /// "xValues", "yValues", "zValues");
2147 /// // Explicit column types
2148 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2149 /// "xValues", "yValues", "zValues");
2150 /// ~~~
2151 ///
2152 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2153 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2154 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2155 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2156 typename V3 = RDFDetail::RInferredType>
2158 std::string_view v2Name = "", std::string_view v3Name = "")
2159 {
2160 std::shared_ptr<::TProfile2D> h(nullptr);
2161 {
2162 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2163 h = model.GetProfile();
2164 }
2165
2166 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2167 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2168 }
2169 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2170 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2171 ? ColumnNames_t()
2172 : ColumnNames_t(columnViews.begin(), columnViews.end());
2173 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h);
2174 }
2175
2176 ////////////////////////////////////////////////////////////////////////////
2177 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2178 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2179 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2180 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2181 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2182 /// \param[in] model The returned histogram will be constructed using this as a model.
2183 /// \param[in] v1Name The name of the column that will fill the x axis.
2184 /// \param[in] v2Name The name of the column that will fill the y axis.
2185 /// \param[in] v3Name The name of the column that will fill the z axis.
2186 /// \param[in] wName The name of the column that will provide the weights.
2187 /// \return the bidimensional profile wrapped in a RResultPtr.
2188 ///
2189 /// This action is *lazy*: upon invocation of this method the calculation is
2190 /// booked but not executed. Also see RResultPtr.
2191 ///
2192 /// ### Example usage:
2193 /// ~~~{.cpp}
2194 /// // Deduce column types (this invocation needs jitting internally)
2195 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2196 /// "xValues", "yValues", "zValues", "weight");
2197 /// // Explicit column types
2198 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2199 /// "xValues", "yValues", "zValues", "weight");
2200 /// ~~~
2201 ///
2202 /// See the first Profile2D() overload for more details.
2203 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2204 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2206 std::string_view v3Name, std::string_view wName)
2207 {
2208 std::shared_ptr<::TProfile2D> h(nullptr);
2209 {
2210 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2211 h = model.GetProfile();
2212 }
2213
2214 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2215 throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2216 }
2217 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2218 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2219 ? ColumnNames_t()
2220 : ColumnNames_t(columnViews.begin(), columnViews.end());
2221 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h);
2222 }
2223
2224 /// \brief Fill and return a two-dimensional profile (*lazy action*).
2225 /// See the first Profile2D() overload for more details.
2226 template <typename V1, typename V2, typename V3, typename W>
2228 {
2229 return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
2230 }
2231
2232 ////////////////////////////////////////////////////////////////////////////
2233 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*).
2234 ///
2235 /// Type T must provide at least:
2236 /// - a copy-constructor
2237 /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList
2238 /// (these types can also be passed as template parameters to this method)
2239 /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the
2240 /// objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that
2241 /// if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in
2242 /// the TCollection*).
2243 ///
2244 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
2245 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
2246 /// \tparam T The type of the object to fill. Automatically deduced.
2247 /// \param[in] model The model to be considered to build the new return value.
2248 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2249 /// \return the filled object wrapped in a RResultPtr.
2250 ///
2251 /// The user gives up ownership of the model object.
2252 /// The list of column names to be used for filling must always be specified.
2253 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
2254 /// Also see RResultPtr.
2255 ///
2256 /// ### Example usage:
2257 /// ~~~{.cpp}
2258 /// MyClass obj;
2259 /// // Deduce column types (this invocation needs jitting internally, and in this case
2260 /// // MyClass needs to be known to the interpreter)
2261 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
2262 /// // explicit column types
2263 /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});
2264 /// ~~~
2265 ///
2266 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T>
2267 RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList)
2268 {
2269 auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model));
2270 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
2271 throw std::runtime_error("The absence of axes limits is not supported yet.");
2272 }
2273 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h,
2274 columnList.size());
2275 }
2276
2277 ////////////////////////////////////////////////////////////////////////////
2278 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2279 ///
2280 /// \tparam V The type of the value column
2281 /// \param[in] value The name of the column with the values to fill the statistics with.
2282 /// \return the filled TStatistic object wrapped in a RResultPtr.
2283 ///
2284 /// ### Example usage:
2285 /// ~~~{.cpp}
2286 /// // Deduce column type (this invocation needs jitting internally)
2287 /// auto stats0 = myDf.Stats("values");
2288 /// // Explicit column type
2289 /// auto stats1 = myDf.Stats<float>("values");
2290 /// ~~~
2291 ///
2292 template <typename V = RDFDetail::RInferredType>
2294 {
2295 ColumnNames_t columns;
2296 if (!value.empty()) {
2297 columns.emplace_back(std::string(value));
2298 }
2299 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2301 return Fill(TStatistic(), validColumnNames);
2302 } else {
2303 return Fill<V>(TStatistic(), validColumnNames);
2304 }
2305 }
2306
2307 ////////////////////////////////////////////////////////////////////////////
2308 /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2309 ///
2310 /// \tparam V The type of the value column
2311 /// \tparam W The type of the weight column
2312 /// \param[in] value The name of the column with the values to fill the statistics with.
2313 /// \param[in] weight The name of the column with the weights to fill the statistics with.
2314 /// \return the filled TStatistic object wrapped in a RResultPtr.
2315 ///
2316 /// ### Example usage:
2317 /// ~~~{.cpp}
2318 /// // Deduce column types (this invocation needs jitting internally)
2319 /// auto stats0 = myDf.Stats("values", "weights");
2320 /// // Explicit column types
2321 /// auto stats1 = myDf.Stats<int, float>("values", "weights");
2322 /// ~~~
2323 ///
2324 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2326 {
2327 ColumnNames_t columns{std::string(value), std::string(weight)};
2328 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
2329 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
2330 const auto validColumnNames = GetValidatedColumnNames(2, columns);
2331 // We have 3 cases:
2332 // 1. Both types are inferred: we use Fill and let the jit kick in.
2333 // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
2334 // 3. Both types are explicit: we invoke the fully compiled Fill method.
2335 if (vIsInferred && wIsInferred) {
2336 return Fill(TStatistic(), validColumnNames);
2337 } else if (vIsInferred != wIsInferred) {
2338 std::string error("The ");
2339 error += vIsInferred ? "value " : "weight ";
2340 error += "column type is explicit, while the ";
2341 error += vIsInferred ? "weight " : "value ";
2342 error += " is specified to be inferred. This case is not supported: please specify both types or none.";
2343 throw std::runtime_error(error);
2344 } else {
2345 return Fill<V, W>(TStatistic(), validColumnNames);
2346 }
2347 }
2348
2349 ////////////////////////////////////////////////////////////////////////////
2350 /// \brief Return the minimum of processed column values (*lazy action*).
2351 /// \tparam T The type of the branch/column.
2352 /// \param[in] columnName The name of the branch/column to be treated.
2353 /// \return the minimum value of the selected column wrapped in a RResultPtr.
2354 ///
2355 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2356 /// template specialization of this method.
2357 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2358 ///
2359 /// This action is *lazy*: upon invocation of this method the calculation is
2360 /// booked but not executed. Also see RResultPtr.
2361 ///
2362 /// ### Example usage:
2363 /// ~~~{.cpp}
2364 /// // Deduce column type (this invocation needs jitting internally)
2365 /// auto minVal0 = myDf.Min("values");
2366 /// // Explicit column type
2367 /// auto minVal1 = myDf.Min<double>("values");
2368 /// ~~~
2369 ///
2370 template <typename T = RDFDetail::RInferredType>
2372 {
2373 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2374 using RetType_t = RDFDetail::MinReturnType_t<T>;
2375 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
2376 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV);
2377 }
2378
2379 ////////////////////////////////////////////////////////////////////////////
2380 /// \brief Return the maximum of processed column values (*lazy action*).
2381 /// \tparam T The type of the branch/column.
2382 /// \param[in] columnName The name of the branch/column to be treated.
2383 /// \return the maximum value of the selected column wrapped in a RResultPtr.
2384 ///
2385 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2386 /// template specialization of this method.
2387 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2388 ///
2389 /// This action is *lazy*: upon invocation of this method the calculation is
2390 /// booked but not executed. Also see RResultPtr.
2391 ///
2392 /// ### Example usage:
2393 /// ~~~{.cpp}
2394 /// // Deduce column type (this invocation needs jitting internally)
2395 /// auto maxVal0 = myDf.Max("values");
2396 /// // Explicit column type
2397 /// auto maxVal1 = myDf.Max<double>("values");
2398 /// ~~~
2399 ///
2400 template <typename T = RDFDetail::RInferredType>
2402 {
2403 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2404 using RetType_t = RDFDetail::MaxReturnType_t<T>;
2405 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
2406 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV);
2407 }
2408
2409 ////////////////////////////////////////////////////////////////////////////
2410 /// \brief Return the mean of processed column values (*lazy action*).
2411 /// \tparam T The type of the branch/column.
2412 /// \param[in] columnName The name of the branch/column to be treated.
2413 /// \return the mean value of the selected column wrapped in a RResultPtr.
2414 ///
2415 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2416 /// template specialization of this method.
2417 ///
2418 /// This action is *lazy*: upon invocation of this method the calculation is
2419 /// booked but not executed. Also see RResultPtr.
2420 ///
2421 /// ### Example usage:
2422 /// ~~~{.cpp}
2423 /// // Deduce column type (this invocation needs jitting internally)
2424 /// auto meanVal0 = myDf.Mean("values");
2425 /// // Explicit column type
2426 /// auto meanVal1 = myDf.Mean<double>("values");
2427 /// ~~~
2428 ///
2429 template <typename T = RDFDetail::RInferredType>
2431 {
2432 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2433 auto meanV = std::make_shared<double>(0);
2434 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV);
2435 }
2436
2437 ////////////////////////////////////////////////////////////////////////////
2438 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*).
2439 /// \tparam T The type of the branch/column.
2440 /// \param[in] columnName The name of the branch/column to be treated.
2441 /// \return the standard deviation value of the selected column wrapped in a RResultPtr.
2442 ///
2443 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2444 /// template specialization of this method.
2445 ///
2446 /// This action is *lazy*: upon invocation of this method the calculation is
2447 /// booked but not executed. Also see RResultPtr.
2448 ///
2449 /// ### Example usage:
2450 /// ~~~{.cpp}
2451 /// // Deduce column type (this invocation needs jitting internally)
2452 /// auto stdDev0 = myDf.StdDev("values");
2453 /// // Explicit column type
2454 /// auto stdDev1 = myDf.StdDev<double>("values");
2455 /// ~~~
2456 ///
2457 template <typename T = RDFDetail::RInferredType>
2459 {
2460 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2461 auto stdDeviationV = std::make_shared<double>(0);
2462 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV);
2463 }
2464
2465 // clang-format off
2466 ////////////////////////////////////////////////////////////////////////////
2467 /// \brief Return the sum of processed column values (*lazy action*).
2468 /// \tparam T The type of the branch/column.
2469 /// \param[in] columnName The name of the branch/column.
2470 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
2471 /// \return the sum of the selected column wrapped in a RResultPtr.
2472 ///
2473 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2474 /// template specialization of this method.
2475 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2476 ///
2477 /// This action is *lazy*: upon invocation of this method the calculation is
2478 /// booked but not executed. Also see RResultPtr.
2479 ///
2480 /// ### Example usage:
2481 /// ~~~{.cpp}
2482 /// // Deduce column type (this invocation needs jitting internally)
2483 /// auto sum0 = myDf.Sum("values");
2484 /// // Explicit column type
2485 /// auto sum1 = myDf.Sum<double>("values");
2486 /// ~~~
2487 ///
2488 template <typename T = RDFDetail::RInferredType>
2490 Sum(std::string_view columnName = "",
2491 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
2492 {
2493 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2494 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
2495 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV);
2496 }
2497 // clang-format on
2498
2499 ////////////////////////////////////////////////////////////////////////////
2500 /// \brief Gather filtering statistics.
2501 /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr.
2502 ///
2503 /// Calling `Report` on the main `RDataFrame` object gathers stats for
2504 /// all named filters in the call graph. Calling this method on a
2505 /// stored chain state (i.e. a graph node different from the first) gathers
2506 /// the stats for all named filters in the chain section between the original
2507 /// `RDataFrame` and that node (included). Stats are gathered in the same
2508 /// order as the named filters have been added to the graph.
2509 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
2510 /// effects cuts had.
2511 ///
2512 /// This action is *lazy*: upon invocation of
2513 /// this method the calculation is booked but not executed. See RResultPtr
2514 /// documentation.
2515 ///
2516 /// ### Example usage:
2517 /// ~~~{.cpp}
2518 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
2519 /// auto cutReport = filtered3.Report();
2520 /// cutReport->Print();
2521 /// ~~~
2522 ///
2524 {
2525 bool returnEmptyReport = false;
2526 // if this is a RInterface<RLoopManager> on which `Define` has been called, users
2527 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
2528 // certainly does not contain named filters.
2529 // The number 4 takes into account the implicit columns for entry and slot number
2530 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
2532 returnEmptyReport = true;
2533
2534 auto rep = std::make_shared<RCutFlowReport>();
2535 using Helper_t = RDFInternal::ReportHelper<Proxied>;
2537
2538 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}),
2540
2541 return MakeResultPtr(rep, *fLoopManager, std::move(action));
2542 }
2543
2544 /////////////////////////////////////////////////////////////////////////////
2545 /// \brief Returns the names of the available columns.
2546 /// \return the container of column names.
2547 ///
2548 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2549 ///
2550 /// ### Example usage:
2551 /// ~~~{.cpp}
2552 /// auto colNames = d.GetColumnNames();
2553 /// // Print columns' names
2554 /// for (auto &&colName : colNames) std::cout << colName << std::endl;
2555 /// ~~~
2556 ///
2558 {
2559 // there could be duplicates between Redefined columns and columns in the data source
2560 std::unordered_set<std::string> allColumns;
2561
2562 auto addIfNotInternal = [&allColumns](std::string_view colName) {
2563 if (!RDFInternal::IsInternalColumn(colName))
2564 allColumns.emplace(colName);
2565 };
2566
2567 auto definedColumns = fColRegister.GetNames();
2568
2569 std::for_each(definedColumns.begin(), definedColumns.end(), addIfNotInternal);
2570
2571 auto tree = fLoopManager->GetTree();
2572 if (tree) {
2573 for (const auto &bName : RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false))
2574 allColumns.emplace(bName);
2575 }
2576
2577 if (fDataSource) {
2578 for (const auto &s : fDataSource->GetColumnNames()) {
2579 if (s.rfind("R_rdf_sizeof", 0) != 0)
2580 allColumns.emplace(s);
2581 }
2582 }
2583
2584 ColumnNames_t ret(allColumns.begin(), allColumns.end());
2585 std::sort(ret.begin(), ret.end());
2586 return ret;
2587 }
2588
2589 /////////////////////////////////////////////////////////////////////////////
2590 /// \brief Return the type of a given column as a string.
2591 /// \return the type of the required column.
2592 ///
2593 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2594 ///
2595 /// ### Example usage:
2596 /// ~~~{.cpp}
2597 /// auto colType = d.GetColumnType("columnName");
2598 /// // Print column type
2599 /// std::cout << "Column " << colType << " has type " << colType << std::endl;
2600 /// ~~~
2601 ///
2603 {
2604 const auto col = fColRegister.ResolveAlias(std::string(column));
2605
2606 RDFDetail::RDefineBase *define =
2607 fColRegister.IsDefineOrAlias(col) ? fColRegister.GetDefines().at(col).get() : nullptr;
2608
2609 const bool convertVector2RVec = true;
2611 convertVector2RVec);
2612 }
2613
2614 /////////////////////////////////////////////////////////////////////////////
2615 /// \brief Return information about the dataframe.
2616 /// \return information about the dataframe as RDFDescription object
2617 ///
2618 /// This convenience function describes the dataframe and combines the following information:
2619 /// - Number of event loops run, see GetNRuns()
2620 /// - Number of total and defined columns, see GetColumnNames() and GetDefinedColumnNames()
2621 /// - Column names, see GetColumnNames()
2622 /// - Column types, see GetColumnType()
2623 /// - Number of processing slots, see GetNSlots()
2624 ///
2625 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2626 /// The result is dependent on the node from which this method is called, e.g. the list of
2627 /// defined columns returned by GetDefinedColumnNames().
2628 ///
2629 /// Please note that this is a convenience feature and the layout of the output can be subject
2630 /// to change and should be parsed via RDFDescription methods.
2631 ///
2632 /// ### Example usage:
2633 /// ~~~{.cpp}
2634 /// RDataFrame df(10);
2635 /// auto df2 = df.Define("x", "1.f").Define("s", "\"myStr\"");
2636 /// // Describe the dataframe
2637 /// df2.Describe().Print()
2638 /// df2.Describe().Print(/*shortFormat=*/true)
2639 /// std::cout << df2.Describe().AsString() << std::endl;
2640 /// std::cout << df2.Describe().AsString(/*shortFormat=*/true) << std::endl;
2641 /// ~~~
2642 ///
2644 {
2645 // Build set of defined column names to find later in all column names
2646 // the defined columns more efficiently
2647 const auto columnNames = GetColumnNames();
2648 std::set<std::string> definedColumnNamesSet;
2649 for (const auto &name : GetDefinedColumnNames())
2650 definedColumnNamesSet.insert(name);
2651
2652 // Get information for the metadata table
2653 const std::vector<std::string> metadataProperties = {"Columns in total", "Columns from defines",
2654 "Event loops run", "Processing slots"};
2655 const std::vector<std::string> metadataValues = {std::to_string(columnNames.size()),
2656 std::to_string(definedColumnNamesSet.size()),
2657 std::to_string(GetNRuns()), std::to_string(GetNSlots())};
2658
2659 // Set header for metadata table
2660 const auto columnWidthProperties = RDFInternal::GetColumnWidth(metadataProperties);
2661 // The column width of the values is required to make right-bound numbers and is equal
2662 // to the maximum of the string "Value" and all values to be put in this column.
2663 const auto columnWidthValues =
2664 std::max(std::max_element(metadataValues.begin(), metadataValues.end())->size(), static_cast<std::size_t>(5u));
2665 std::stringstream ss;
2666 ss << std::left << std::setw(columnWidthProperties) << "Property" << std::setw(columnWidthValues) << "Value\n"
2667 << std::setw(columnWidthProperties) << "--------" << std::setw(columnWidthValues) << "-----\n";
2668
2669 // Build metadata table
2670 // All numbers should be bound to the right and strings bound to the left.
2671 for (auto i = 0u; i < metadataProperties.size(); i++) {
2672 ss << std::left << std::setw(columnWidthProperties) << metadataProperties[i] << std::right
2673 << std::setw(columnWidthValues) << metadataValues[i] << '\n';
2674 }
2675 ss << '\n'; // put space between this and the next table
2676
2677 // Set header for columns table
2678 const auto columnWidthNames = RDFInternal::GetColumnWidth(columnNames);
2679 const auto columnTypes = GetColumnTypeNamesList(columnNames);
2680 const auto columnWidthTypes = RDFInternal::GetColumnWidth(columnTypes);
2681 ss << std::left << std::setw(columnWidthNames) << "Column" << std::setw(columnWidthTypes) << "Type"
2682 << "Origin\n"
2683 << std::setw(columnWidthNames) << "------" << std::setw(columnWidthTypes) << "----"
2684 << "------\n";
2685
2686 // Build columns table
2687 const auto nCols = columnNames.size();
2688 for (auto i = 0u; i < nCols; i++) {
2689 auto origin = "Dataset";
2690 if (definedColumnNamesSet.find(columnNames[i]) != definedColumnNamesSet.end())
2691 origin = "Define";
2692 ss << std::left << std::setw(columnWidthNames) << columnNames[i] << std::setw(columnWidthTypes)
2693 << columnTypes[i] << origin;
2694 if (i < nCols - 1)
2695 ss << '\n';
2696 }
2697 // Use the string returned from DescribeDataset() as the 'brief' description
2698 // Use the converted to string stringstream ss as the 'full' description
2699 return RDFDescription(DescribeDataset(), ss.str());
2700 }
2701
2702 /// \brief Returns the names of the filters created.
2703 /// \return the container of filters names.
2704 ///
2705 /// If called on a root node, all the filters in the computation graph will
2706 /// be printed. For any other node, only the filters upstream of that node.
2707 /// Filters without a name are printed as "Unnamed Filter"
2708 /// This is not an action nor a transformation, just a query to the RDataFrame object.
2709 ///
2710 /// ### Example usage:
2711 /// ~~~{.cpp}
2712 /// auto filtNames = d.GetFilterNames();
2713 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
2714 /// ~~~
2715 ///
2716 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
2717
2718 /// \brief Returns the names of the defined columns.
2719 /// \return the container of the defined column names.
2720 ///
2721 /// This is not an action nor a transformation, just a simple utility to
2722 /// get the columns names that have been defined up to the node.
2723 /// If no column has been defined, e.g. on a root node, it returns an
2724 /// empty collection.
2725 ///
2726 /// ### Example usage:
2727 /// ~~~{.cpp}
2728 /// auto defColNames = d.GetDefinedColumnNames();
2729 /// // Print defined columns' names
2730 /// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
2731 /// ~~~
2732 ///
2734 {
2735 ColumnNames_t definedColumns;
2736
2737 auto columns = fColRegister.GetDefines();
2738
2739 for (const auto &column : columns) {
2740 if (!RDFInternal::IsInternalColumn(column.first))
2741 definedColumns.emplace_back(column.first);
2742 }
2743
2744 return definedColumns;
2745 }
2746
2747 /// \brief Return a descriptor for the systematic variations registered in this branch of the computation graph.
2748 ///
2749 /// This is not an action nor a transformation, just a simple utility to
2750 /// inspect the systematic variations that have been registered with Vary() up to this node.
2751 /// When called on the root node, it returns an empty descriptor.
2752 ///
2753 /// ### Example usage:
2754 /// ~~~{.cpp}
2755 /// auto variations = d.GetVariations();
2756 /// variations.Print();
2757 /// ~~~
2758 ///
2760
2761 /// \brief Checks if a column is present in the dataset.
2762 /// \return true if the column is available, false otherwise
2763 ///
2764 /// This method checks if a column is part of the input ROOT dataset, has
2765 /// been defined or can be provided by the data source.
2766 ///
2767 /// Example usage:
2768 /// ~~~{.cpp}
2769 /// ROOT::RDataFrame base(1);
2770 /// auto rdf = base.Define("definedColumn", [](){return 0;});
2771 /// rdf.HasColumn("definedColumn"); // true: we defined it
2772 /// rdf.HasColumn("rdfentry_"); // true: it's always there
2773 /// rdf.HasColumn("foo"); // false: it is not there
2774 /// ~~~
2776 {
2777 if (fColRegister.IsDefineOrAlias(columnName))
2778 return true;
2779
2780 if (auto tree = fLoopManager->GetTree()) {
2781 const auto &branchNames = fLoopManager->GetBranchNames();
2782 const auto branchNamesEnd = branchNames.end();
2783 if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
2784 return true;
2785 }
2786
2787 if (fDataSource && fDataSource->HasColumn(columnName))
2788 return true;
2789
2790 return false;
2791 }
2792
2793 /// \brief Gets the number of data processing slots.
2794 /// \return The number of data processing slots used by this RDataFrame instance
2795 ///
2796 /// This method returns the number of data processing slots used by this RDataFrame
2797 /// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
2798 ///
2799 /// Example usage:
2800 /// ~~~{.cpp}
2801 /// ROOT::EnableImplicitMT(6)
2802 /// ROOT::RDataFrame df(1);
2803 /// std::cout << df.GetNSlots() << std::endl; // prints "6"
2804 /// ~~~
2805 unsigned int GetNSlots() const { return fLoopManager->GetNSlots(); }
2806
2807 /// \brief Gets the number of event loops run.
2808 /// \return The number of event loops run by this RDataFrame instance
2809 ///
2810 /// This method returns the number of events loops run so far by this RDataFrame instance.
2811 ///
2812 /// Example usage:
2813 /// ~~~{.cpp}
2814 /// ROOT::RDataFrame df(1);
2815 /// std::cout << df.GetNRuns() << std::endl; // prints "0"
2816 /// df.Sum("rdfentry_").GetValue(); // trigger the event loop
2817 /// std::cout << df.GetNRuns() << std::endl; // prints "1"
2818 /// df.Sum("rdfentry_").GetValue(); // trigger another event loop
2819 /// std::cout << df.GetNRuns() << std::endl; // prints "2"
2820 /// ~~~
2821 unsigned int GetNRuns() const { return fLoopManager->GetNRuns(); }
2822
2823 // clang-format off
2824 ////////////////////////////////////////////////////////////////////////////
2825 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2826 /// \tparam F The type of the aggregator callable. Automatically deduced.
2827 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2828 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2829 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
2830 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2831 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2832 /// \param[in] aggIdentity The aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted)
2833 /// \return the result of the aggregation wrapped in a RResultPtr.
2834 ///
2835 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2836 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2837 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2838 /// the value of the column columnName.
2839 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2840 /// Otherwise the signature of aggregator must be `void(U&,T)`.
2841 ///
2842 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2843 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2844 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2845 ///
2846 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2847 ///
2848 /// Example usage:
2849 /// ~~~{.cpp}
2850 /// auto aggregator = [](double acc, double x) { return acc * x; };
2851 /// ROOT::EnableImplicitMT();
2852 /// // If multithread is enabled, the aggregator function will be called by more threads
2853 /// // and will produce a vector of partial accumulators.
2854 /// // The merger function performs the final aggregation of these partial results.
2855 /// auto merger = [](std::vector<double> &accumulators) {
2856 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2857 /// accumulators[0] *= accumulators[i];
2858 /// }
2859 /// };
2860 ///
2861 /// // The accumulator is initialized at this value by every thread.
2862 /// double initValue = 1.;
2863 ///
2864 /// // Multiplies all elements of the column "x"
2865 /// auto result = d.Aggregate(aggregator, merger, columnName, initValue);
2866 /// ~~~
2867 // clang-format on
2868 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2869 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2870 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2871 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2872 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2873 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2874 {
2875 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2876 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2877
2878 const auto validColumnNames = GetValidatedColumnNames(1, columns);
2879 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
2880
2881 auto accObjPtr = std::make_shared<U>(aggIdentity);
2882 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2884 auto action = std::make_unique<Action_t>(
2885 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2887 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2888 }
2889
2890 // clang-format off
2891 ////////////////////////////////////////////////////////////////////////////
2892 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2893 /// \tparam F The type of the aggregator callable. Automatically deduced.
2894 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2895 /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2896 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2897 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2898 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2899 /// \return the result of the aggregation wrapped in a RResultPtr.
2900 ///
2901 /// See previous Aggregate overload for more information.
2902 // clang-format on
2903 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2904 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2905 typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2906 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2907 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2908 {
2909 static_assert(
2911 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2912 return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2913 }
2914
2915 // clang-format off
2916 ////////////////////////////////////////////////////////////////////////////
2917 /// \brief Book execution of a custom action using a user-defined helper object.
2918 /// \tparam FirstColumn The type of the first column used by this action. Inferred together with OtherColumns if not present.
2919 /// \tparam OtherColumns A list of the types of the other columns used by this action
2920 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2921 /// \param[in] helper The Action Helper to be scheduled.
2922 /// \param[in] columns The names of the columns on which the helper acts.
2923 /// \return the result of the helper wrapped in a RResultPtr.
2924 ///
2925 /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2926 /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
2927 /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2928 ///
2929 /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
2930 /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
2931 /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
2932 /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
2933 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2934 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2935 /// the requested columns for the particular entry being processed.
2936 /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
2937 /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
2938 /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
2939 /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations.
2940 /// It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper,
2941 /// or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations).
2942 /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2943 /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
2944 /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
2945 /// method concurrently, but will always pass different 'slot' numbers.
2946 /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
2947 /// Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called
2948 /// before Initialize(), because the RResultPtr is constructed before the event loop is started.
2949 /// * ROOT::RDF::SampleCallback_t GetSampleCallback(): optional. If present, it must return a callable with the
2950 /// appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing
2951 /// of every sample, as per with DefinePerSample().
2952 ///
2953 /// In case this is called without specifying column types, jitting is used,
2954 /// and the Helper class needs to be known to the interpreter.<br>
2955 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2956 ///
2957 /// ### Examples
2958 /// See [this tutorial](https://root.cern/doc/master/df018__customActions_8C.html) for an example implementation of an action helper.<br>
2959 /// It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx.
2960 ///
2961 // clang-format on
2962
2963 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper>
2965 {
2966 using HelperT = std::decay_t<Helper>;
2967 // TODO add more static sanity checks on Helper
2970 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2971
2972 auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper));
2973 auto resPtr = hPtr->GetResultPtr();
2974
2976 return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{});
2977 } else {
2978 return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
2979 columns.size());
2980 }
2981 }
2982
2983 ////////////////////////////////////////////////////////////////////////////
2984 /// \brief Provides a representation of the columns in the dataset.
2985 /// \tparam ColumnTypes variadic list of branch/column types.
2986 /// \param[in] columnList Names of the columns to be displayed.
2987 /// \param[in] nRows Number of events for each column to be displayed.
2988 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
2989 /// \return the `RDisplay` instance wrapped in a RResultPtr.
2990 ///
2991 /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
2992 /// form. RDisplay will either print on the standard output a summarized version through `RDisplay::Print()` or will
2993 /// return a complete version through `RDisplay::AsString()`.
2994 ///
2995 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see
2996 /// RResultPtr.
2997 ///
2998 /// Example usage:
2999 /// ~~~{.cpp}
3000 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
3001 /// auto d1 = rdf.Display("");
3002 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
3003 /// auto d2 = d.Display({"x", "y"}, 128);
3004 /// // Printing the short representations, the event loop will run
3005 /// d1->Print();
3006 /// d2->Print();
3007 /// ~~~
3008 template <typename... ColumnTypes>
3009 RResultPtr<RDisplay>
3010 Display(const ColumnNames_t &columnList, int nRows = 5, size_t nMaxCollectionElements = 10)
3011 {
3012 CheckIMTDisabled("Display");
3013 auto newCols = columnList;
3014 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3015 auto displayer = std::make_shared<RDFInternal::RDisplay>(newCols, GetColumnTypeNamesList(newCols), nRows,
3016 nMaxCollectionElements);
3017 // Need to add ULong64_t type corresponding to the first column rdfentry_
3018 return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>(std::move(newCols), displayer,
3019 displayer);
3020 }
3021
3022 ////////////////////////////////////////////////////////////////////////////
3023 /// \brief Provides a representation of the columns in the dataset.
3024 /// \param[in] columnList Names of the columns to be displayed.
3025 /// \param[in] nRows Number of events for each column to be displayed.
3026 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3027 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3028 ///
3029 /// This overload automatically infers the column types.
3030 /// See the previous overloads for further details.
3031 ///
3032 /// Invoked when no types are specified to Display
3034 Display(const ColumnNames_t &columnList, int nRows = 5, size_t nMaxCollectionElements = 10)
3035 {
3036 CheckIMTDisabled("Display");
3037 auto newCols = columnList;
3038 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3039 auto displayer = std::make_shared<RDFInternal::RDisplay>(newCols, GetColumnTypeNamesList(newCols), nRows,
3040 nMaxCollectionElements);
3041 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(std::move(newCols), displayer,
3042 displayer, columnList.size() + 1);
3043 }
3044
3045 ////////////////////////////////////////////////////////////////////////////
3046 /// \brief Provides a representation of the columns in the dataset.
3047 /// \param[in] columnNameRegexp A regular expression to select the columns.
3048 /// \param[in] nRows Number of events for each column to be displayed.
3049 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3050 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3051 ///
3052 /// The existing columns are matched against the regular expression. If the string provided
3053 /// is empty, all columns are selected.
3054 /// See the previous overloads for further details.
3056 Display(std::string_view columnNameRegexp = "", int nRows = 5, size_t nMaxCollectionElements = 10)
3057 {
3058 const auto columnNames = GetColumnNames();
3059 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display");
3060 return Display(selectedColumns, nRows, nMaxCollectionElements);
3061 }
3062
3063 ////////////////////////////////////////////////////////////////////////////
3064 /// \brief Provides a representation of the columns in the dataset.
3065 /// \param[in] columnList Names of the columns to be displayed.
3066 /// \param[in] nRows Number of events for each column to be displayed.
3067 /// \param[in] nMaxCollectionElements Number of maximum elements in collection.
3068 /// \return the `RDisplay` instance wrapped in a RResultPtr.
3069 ///
3070 /// See the previous overloads for further details.
3071 RResultPtr<RDisplay> Display(std::initializer_list<std::string> columnList, int nRows = 5,
3072 size_t nMaxCollectionElements = 10)
3073 {
3074 ColumnNames_t selectedColumns(columnList);
3075 return Display(selectedColumns, nRows, nMaxCollectionElements);
3076 }
3077
3078private:
3080 {
3081 // Entry number column
3082 const std::string entryColName = "rdfentry_";
3083 const std::string entryColType = "ULong64_t";
3084 auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
3085 using NewColEntry_t = RDFDetail::RDefine<decltype(entryColGen), RDFDetail::CustomColExtraArgs::SlotAndEntry>;
3086
3087 auto entryColumn = std::make_shared<NewColEntry_t>(entryColName, entryColType, std::move(entryColGen),
3089 fColRegister.AddDefine(entryColumn);
3090
3091 // Slot number column
3092 const std::string slotColName = "rdfslot_";
3093 const std::string slotColType = "unsigned int";
3094 auto slotColGen = [](unsigned int slot) { return slot; };
3095 using NewColSlot_t = RDFDetail::RDefine<decltype(slotColGen), RDFDetail::CustomColExtraArgs::Slot>;
3096
3097 auto slotColumn = std::make_shared<NewColSlot_t>(slotColName, slotColType, std::move(slotColGen), ColumnNames_t{},
3099 fColRegister.AddDefine(slotColumn);
3100
3101 fColRegister.AddAlias("tdfentry_", entryColName);
3102 fColRegister.AddAlias("tdfslot_", slotColName);
3103 }
3104
3105 std::vector<std::string> GetColumnTypeNamesList(const ColumnNames_t &columnList)
3106 {
3107 std::vector<std::string> types;
3108
3109 for (auto column : columnList) {
3110 types.push_back(GetColumnType(column));
3111 }
3112 return types;
3113 }
3114
3116 {
3118 std::string error(callerName);
3119 error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
3120 throw std::runtime_error(error);
3121 }
3122 }
3123
3124 /// Create RAction object, return RResultPtr for the action
3125 /// Overload for the case in which all column types were specified (no jitting).
3126 /// For most actions, `r` and `helperArg` will refer to the same object, because the only argument to forward to
3127 /// the action helper is the result value itself. We need the distinction for actions such as Snapshot or Cache,
3128 /// for which the constructor arguments of the action helper are different from the returned value.
3129 template <typename ActionTag, typename... ColTypes, typename ActionResultType,
3130 typename HelperArgType = ActionResultType,
3131 std::enable_if_t<!RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
3133 CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
3134 const std::shared_ptr<HelperArgType> &helperArg, const int /*nColumns*/ = -1)
3135 {
3136 constexpr auto nColumns = sizeof...(ColTypes);
3137
3138 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
3140
3141 const auto nSlots = fLoopManager->GetNSlots();
3142
3143 auto action = RDFInternal::BuildAction<ColTypes...>(validColumnNames, helperArg, nSlots, fProxiedPtr, ActionTag{},
3144 fColRegister);
3145 fLoopManager->AddSampleCallback(action->GetSampleCallback());
3146 return MakeResultPtr(r, *fLoopManager, std::move(action));
3147 }
3148
3149 /// Create RAction object, return RResultPtr for the action
3150 /// Overload for the case in which one or more column types were not specified (RTTI + jitting).
3151 /// This overload has a `nColumns` optional argument. If present, the number of required columns for
3152 /// this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(ColTypes).
3153 template <typename ActionTag, typename... ColTypes, typename ActionResultType,
3154 typename HelperArgType = ActionResultType,
3155 std::enable_if_t<RDFInternal::RNeedJitting<ColTypes...>::value, int> = 0>
3156 RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r,
3157 const std::shared_ptr<HelperArgType> &helperArg, const int nColumns = -1)
3158 {
3159 auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(ColTypes));
3160
3161 const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
3162 const unsigned int nSlots = fLoopManager->GetNSlots();
3163
3164 auto *tree = fLoopManager->GetTree();
3165 auto *helperArgOnHeap = RDFInternal::MakeSharedOnHeap(helperArg);
3166
3167 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
3168 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
3169 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister, fDataSource);
3170
3171 const auto jittedAction = std::make_shared<RDFInternal::RJittedAction>(
3172 *fLoopManager, validColumnNames, fColRegister, fProxiedPtr->GetVariations());
3173 auto jittedActionOnHeap = RDFInternal::MakeWeakOnHeap(jittedAction);
3174
3175 auto toJit =
3176 RDFInternal::JitBuildAction(validColumnNames, upcastNodeOnHeap, typeid(HelperArgType), typeid(ActionTag),
3177 helperArgOnHeap, tree, nSlots, fColRegister, fDataSource, jittedActionOnHeap);
3178 fLoopManager->ToJitExec(toJit);
3179 return MakeResultPtr(r, *fLoopManager, std::move(jittedAction));
3180 }
3181
3182 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
3184 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
3185 {
3186 if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine
3190 } else {
3194 }
3195
3196 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
3197 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
3199 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
3201
3202 constexpr auto nColumns = ColTypes_t::list_size;
3203
3204 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
3205 CheckAndFillDSColumns(validColumnNames, ColTypes_t());
3206
3207 // Declare return type to the interpreter, for future use by jitted actions
3208 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
3209 if (retTypeName.empty()) {
3210 // The type is not known to the interpreter.
3211 // We must not error out here, but if/when this column is used in jitted code
3212 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
3213 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3214 }
3215
3216 using NewCol_t = RDFDetail::RDefine<F, DefineType>;
3217 auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
3219
3221 newCols.AddDefine(newColumn);
3222
3223 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
3224
3225 return newInterface;
3226 }
3227
3228 // This overload is chosen when the callable passed to Define or DefineSlot returns void.
3229 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
3230 // this way compilation of `Define` has no way to continue after throwing the error.
3231 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
3232 bool IsFStringConv = std::is_convertible<F, std::string>::value,
3233 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
3234 std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>
3236 {
3237 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
3238 "Error in `Define`: type returned by expression is not default-constructible");
3239 return *this; // never reached
3240 }
3241
3242 template <typename... ColumnTypes>
3244 const ColumnNames_t &columnList, const RSnapshotOptions &options)
3245 {
3246 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3247
3248 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size());
3249 const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
3252
3253 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
3254 const auto &treename = parsedTreePath.fTreeName;
3255 const auto &dirname = parsedTreePath.fDirName;
3256
3257 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
3258 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
3259
3261 auto newRDF = std::make_shared<ROOT::RDataFrame>(fullTreeName, filename, validCols);
3262
3263 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs);
3264
3265 if (!options.fLazy)
3266 *resPtr;
3267 return resPtr;
3268 }
3269
3270 ////////////////////////////////////////////////////////////////////////////
3271 /// \brief Implementation of cache.
3272 template <typename... ColTypes, std::size_t... S>
3273 RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>)
3274 {
3275 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3276
3277 // Check at compile time that the columns types are copy constructible
3278 constexpr bool areCopyConstructible =
3280 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
3281
3282 RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size());
3283
3284 auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
3285 auto ds = std::make_unique<RLazyDS<ColTypes...>>(
3286 std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
3287
3288 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns));
3289
3290 return cachedRDF;
3291 }
3292
3293 template <typename Helper, typename ActionResultType>
3294 auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr,
3295 const std::shared_ptr<Helper> &hPtr,
3297 -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
3298 {
3299 return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, 0u);
3300 }
3301
3302 template <typename Helper, typename ActionResultType, typename... Others>
3303 RResultPtr<ActionResultType>
3304 CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &,
3305 const std::shared_ptr<Helper>& /*hPtr*/,
3306 Others...)
3307 {
3308 throw std::logic_error(std::string("An action was booked with no input columns, but the action requires "
3309 "columns! The action helper type was ") +
3310 typeid(Helper).name());
3311 return {};
3312 }
3313
3314 template <typename RetType>
3315 void SanityChecksForVary(const std::vector<std::string> &colNames, const std::vector<std::string> &variationTags,
3316 std::string_view variationName)
3317 {
3318 R__ASSERT(variationTags.size() > 0 && "Must have at least one variation.");
3319 R__ASSERT(colNames.size() > 0 && "Must have at least one varied column.");
3320 R__ASSERT(!variationName.empty() && "Must provide a variation name.");
3321
3322 for (auto &colName : colNames) {
3325 }
3326 RDFInternal::CheckValidCppVarName(variationName, "Vary");
3327
3328 static_assert(RDFInternal::IsRVec<RetType>::value, "Vary expressions must return an RVec.");
3329
3330 if (colNames.size() > 1) { // we are varying multiple columns simultaneously, RetType is RVec<RVec<T>>
3332 if (!hasInnerRVec)
3333 throw std::runtime_error("This Vary call is varying multiple columns simultaneously but the expression "
3334 "does not return an RVec of RVecs.");
3335
3336 auto colTypes = GetColumnTypeNamesList(colNames);
3337 auto allColTypesEqual =
3338 std::all_of(colTypes.begin() + 1, colTypes.end(), [&](const std::string &t) { return t == colTypes[0]; });
3339 if (!allColTypesEqual)
3340 throw std::runtime_error("Cannot simultaneously vary multiple columns of different types.");
3341
3342 const auto &innerTypeID = typeid(RDFInternal::InnerValueType_t<RetType>);
3343
3344 const auto &definesMap = fColRegister.GetDefines();
3345 for (auto i = 0u; i < colTypes.size(); ++i) {
3346 const auto it = definesMap.find(colNames[i]);
3347 const auto &expectedTypeID =
3348 it == definesMap.end() ? RDFInternal::TypeName2TypeID(colTypes[i]) : it->second->GetTypeId();
3349 if (innerTypeID != expectedTypeID)
3350 throw std::runtime_error("Varied values for column \"" + colNames[i] + "\" have a different type (" +
3351 RDFInternal::TypeID2TypeName(innerTypeID) + ") than the nominal value (" +
3352 colTypes[i] + ").");
3353 }
3354 } else { // we are varying a single column, RetType is RVec<T>
3355 const auto &retTypeID = typeid(typename RetType::value_type);
3356 const auto &colName = colNames[0]; // we have only one element in there
3357 const auto &definesMap = fColRegister.GetDefines();
3358 const auto it = definesMap.find(colName);
3359 const auto &expectedTypeID =
3360 it == definesMap.end() ? RDFInternal::TypeName2TypeID(GetColumnType(colName)) : it->second->GetTypeId();
3361 if (retTypeID != expectedTypeID)
3362 throw std::runtime_error("Varied values for column \"" + colName + "\" have a different type (" +
3363 RDFInternal::TypeID2TypeName(retTypeID) + ") than the nominal value (" +
3364 GetColumnType(colName) + ").");
3365 }
3366
3367 // when varying multiple columns, they must be different columns
3368 if (colNames.size() > 1) {
3369 std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
3370 if (uniqueCols.size() != colNames.size())
3371 throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
3372 }
3373 }
3374
3375protected:
3376 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm, const RDFInternal::RColumnRegister &columns,
3377 RDataSource *ds)
3378 : fProxiedPtr(proxied), fLoopManager(&lm), fDataSource(ds), fColRegister(columns)
3379 {
3380 }
3381
3383
3384 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
3385
3386 ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
3387 {
3389 }
3390
3391 template <typename... ColumnTypes>
3393 {
3394 if (fDataSource != nullptr)
3395 RDFInternal::AddDSColumns(validCols, *fLoopManager, *fDataSource, typeList, fColRegister);
3396 }
3397};
3398
3399} // namespace RDF
3400
3401} // namespace ROOT
3402
3403#endif // ROOT_RDF_INTERFACE
#define f(i)
Definition: RSha256.hxx:104
#define h(i)
Definition: RSha256.hxx:106
unsigned int UInt_t
Definition: RtypesCore.h:46
unsigned long long ULong64_t
Definition: RtypesCore.h:81
#define R__ASSERT(e)
Definition: TError.h:118
constexpr Int_t kError
Definition: TError.h:46
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize id
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition: TGX11.cxx:110
Base class for action helpers, see RInterface::Book() for more information.
Definition: RActionImpl.hxx:26
The head node of a RDF computation graph.
ULong64_t GetNEmptyEntries() const
const ColumnNames_t & GetBranchNames()
Return all valid TTree::Branch names (caching results for subsequent calls).
void ToJitExec(const std::string &) const
void AddSampleCallback(ROOT::RDF::SampleCallback_t &&callback)
unsigned int GetNRuns() const
void Run()
Start the event loop with a different mechanism depending on IMT/no IMT, data source/no data source.
RDataSource * GetDataSource() const
unsigned int GetNSlots() const
void Jit()
Add RDF nodes that require just-in-time compilation to the computation graph.
Helper class that provides the operation graph nodes.
A RDataFrame node that produces a result.
Definition: RAction.hxx:54
A binder for user-defined columns and aliases.
bool IsDefineOrAlias(std::string_view name) const
Check if the provided name is tracked in the names list.
void AddDefine(const std::shared_ptr< RDFDetail::RDefineBase > &column)
Add a new defined column.
void AddVariation(const std::shared_ptr< RVariationBase > &variation)
Register a new systematic variation.
std::string ResolveAlias(std::string_view alias) const
Return the actual column name that the alias resolves to.
const VariationsMap_t & GetVariations() const
Return the multimap of systematic variations, see fVariations.
const DefinesMap_t & GetDefines() const
Return a map of pointers to the defined columns.
ColumnNames_t GetNames() const
Return the list of the names of the defined columns (Defines + Aliases).
void AddAlias(std::string_view alias, std::string_view colName)
Add a new alias to the ledger.
A DFDescription contains useful information about a given RDataFrame computation graph.
RDataSource defines an API that RDataFrame can use to read arbitrary data formats.
virtual bool HasColumn(std::string_view colName) const =0
Checks if the dataset has a certain column.
virtual std::string GetLabel()
Return a string representation of the datasource type.
virtual const std::vector< std::string > & GetColumnNames() const =0
Returns a reference to the collection of the dataset's column names.
The public interface to the RDataFrame federation of classes.
Definition: RInterface.hxx:104
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
RInterface(const RInterface &)=default
Copy-ctor for RInterface.
RResultPtr<::TH1D > Histo1D(std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.})
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RResultPtr<::TH2D > Histo2D(const TH2DModel &model)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a one-dimensional profile (lazy action).
RResultPtr<::THnD > HistoND(const THnDModel &model, const ColumnNames_t &columnList)
Fill and return an N-dimensional histogram (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::string_view columnNameRegexp="", const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< TStatistic > Stats(std::string_view value="")
Return a TStatistic object, filled once per event (lazy action).
RLoopManager * GetLoopManager() const
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for an existing columns using auto-generated variation tags.
Definition: RInterface.hxx:744
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, std::size_t nVariations, std::string_view variationName="")
Register systematic variations for an existing column.
Definition: RInterface.hxx:864
std::string DescribeDataset() const
Definition: RInterface.hxx:126
RResultPtr<::TGraph > Graph(std::string_view x="", std::string_view y="")
Fill and return a TGraph object (lazy action).
RResultPtr< ActionResultType > CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &, const std::shared_ptr< Helper > &, Others...)
RInterface< Proxied, DS_t > DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot.
Definition: RInterface.hxx:419
RResultPtr< double > StdDev(std::string_view columnName="")
Return the unbiased standard deviation of processed column values (lazy action).
std::enable_if_t< std::is_default_constructible< RetType >::value, RInterface< Proxied, DS_t > > DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
unsigned int GetNSlots() const
Gets the number of data processing slots.
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, F expression)
Define a new column that is updated when the input sample changes.
Definition: RInterface.hxx:608
RInterface & operator=(RInterface &&)=default
Move-assignment operator for RInterface.
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, std::size_t nVariations, std::string_view variationName)
Register systematic variations for one or more existing columns using auto-generated tags.
Definition: RInterface.hxx:814
void ForeachSlot(F f, const ColumnNames_t &columns={})
Execute a user-defined function requiring a processing slot index on each entry (instant action).
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const int nColumns=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which one or more co...
RInterface< Proxied, DS_t > Vary(std::string_view colName, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for an existing column.
Definition: RInterface.hxx:846
RResultPtr< RDisplay > Display(std::initializer_list< std::string > columnList, int nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, int nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RInterface< Proxied, DS_t > Define(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column.
Definition: RInterface.hxx:390
RResultPtr< TStatistic > Stats(std::string_view value, std::string_view weight)
Return a TStatistic object, filled once per event (lazy action).
RInterface< Proxied, DS_t > Redefine(std::string_view name, std::string_view expression)
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:557
auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr< ActionResultType > &resPtr, const std::shared_ptr< Helper > &hPtr, TTraits::TypeList< RDFDetail::RInferredType >) -> decltype(hPtr->Exec(0u), RResultPtr< ActionResultType >{})
RDataSource * fDataSource
Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the...
Definition: RInterface.hxx:121
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, std::size_t nVariations, std::string_view variationName)
Register systematic variations for one or more existing columns.
Definition: RInterface.hxx:890
RResultPtr< RDisplay > Display(std::string_view columnNameRegexp="", int nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name="", std::string_view v2Name="")
Fill and return a two-dimensional histogram (lazy action).
RResultPtr< RInterface< RLoopManager > > SnapshotImpl(std::string_view fullTreeName, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options)
RResultPtr< ActionResultType > CreateAction(const ColumnNames_t &columns, const std::shared_ptr< ActionResultType > &r, const std::shared_ptr< HelperArgType > &helperArg, const int=-1)
Create RAction object, return RResultPtr for the action Overload for the case in which all column typ...
ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model)
Fill and return a one-dimensional profile (lazy action).
RInterface(const std::shared_ptr< RLoopManager > &proxied)
Build a RInterface from a RLoopManager.
Definition: RInterface.hxx:226
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const std::initializer_list< std::string > &columns)
Append a filter to the call graph.
Definition: RInterface.hxx:326
RInterface< Proxied, DS_t > DefinePerSample(std::string_view name, std::string_view expression)
Define a new column that is updated when the input sample changes.
Definition: RInterface.hxx:674
RResultPtr< double > Mean(std::string_view columnName="")
Return the mean of processed column values (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, std::initializer_list< std::string > columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RInterface< Proxied, DS_t > Alias(std::string_view alias, std::string_view columnName)
Allow to refer to a column with a different name.
Definition: RInterface.hxx:964
RInterface< RLoopManager > Cache(const ColumnNames_t &columnList)
Save selected columns in memory.
RInterface< Proxied, DS_t > Redefine(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:499
RInterface< RLoopManager > Cache(std::string_view columnNameRegexp="")
Save selected columns in memory.
RResultPtr< typename std::decay_t< Helper >::Result_t > Book(Helper &&helper, const ColumnNames_t &columns={})
Book execution of a custom action using a user-defined helper object.
RLoopManager * fLoopManager
Definition: RInterface.hxx:119
RResultPtr< RDisplay > Display(const ColumnNames_t &columnList, int nRows=5, size_t nMaxCollectionElements=10)
Provides a representation of the columns in the dataset.
friend class RDFInternal::GraphDrawing::GraphCreatorHelper
Definition: RInterface.hxx:110
RResultPtr<::TH2D > Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a weighted two-dimensional histogram (lazy action).
RInterface & operator=(const RInterface &)=default
Copy-assignment operator for RInterface.
RDFInternal::RColumnRegister fColRegister
Contains the columns defined up to this node.
Definition: RInterface.hxx:124
RResultPtr< RDFDetail::SumReturnType_t< T > > Sum(std::string_view columnName="", const RDFDetail::SumReturnType_t< T > &initValue=RDFDetail::SumReturnType_t< T >{})
Return the sum of processed column values (lazy action).
RInterface< Proxied, DS_t > Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName="")
Register systematic variations for an existing column.
Definition: RInterface.hxx:730
RResultPtr< ULong64_t > Count()
Return the number of entries processed (lazy action).
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, std::string_view expression, const std::vector< std::string > &variationTags, std::string_view variationName)
Register systematic variations for one or more existing columns.
Definition: RInterface.hxx:918
RInterface< Proxied, DS_t > Define(std::string_view name, std::string_view expression)
Define a new column.
Definition: RInterface.hxx:467
std::shared_ptr< Proxied > fProxiedPtr
Smart pointer to the graph node encapsulated by this RInterface.
Definition: RInterface.hxx:117
RResultPtr<::TH1D > Histo1D(std::string_view vName)
Fill and return a one-dimensional histogram with the values of a column (lazy action).
ColumnNames_t GetColumnNames()
Returns the names of the available columns.
RInterface< Proxied, DS_t > Vary(const std::vector< std::string > &colNames, F &&expression, const ColumnNames_t &inputColumns, const std::vector< std::string > &variationTags, std::string_view variationName)
Register a systematic variation that affects multiple columns simultaneously.
Definition: RInterface.hxx:775
RInterface< Proxied, DS_t > RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:537
RResultPtr<::TH1D > Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
Fill and return a one-dimensional histogram with the weighted values of a column (lazy action).
RInterface< RLoopManager > CacheImpl(const ColumnNames_t &columnList, std::index_sequence< S... >)
Implementation of cache.
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int end)
Creates a node that filters entries based on range.
RResultPtr< COLL > Take(std::string_view column="")
Return a collection of values of a column (lazy action, returns a std::vector by default).
RInterface< RLoopManager > Cache(std::initializer_list< std::string > columnList)
Save selected columns in memory.
void CheckAndFillDSColumns(ColumnNames_t validCols, TTraits::TypeList< ColumnTypes... > typeList)
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a two-dimensional profile (lazy action).
const std::shared_ptr< Proxied > & GetProxiedPtr() const
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name="", std::string_view v2Name="", std::string_view v3Name="")
Fill and return a three-dimensional histogram (lazy action).
RResultPtr< std::decay_t< T > > Fill(T &&model, const ColumnNames_t &columnList)
Return an object of type T on which T::Fill will be called once per event (lazy action).
std::enable_if_t<!IsFStringConv &&!IsRetTypeDefConstr, RInterface< Proxied, DS_t > > DefineImpl(std::string_view, F, const ColumnNames_t &)
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
void SanityChecksForVary(const std::vector< std::string > &colNames, const std::vector< std::string > &variationTags, std::string_view variationName)
RResultPtr< RCutFlowReport > Report()
Gather filtering statistics.
RInterface< Proxied, DS_t > RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns={})
Overwrite the value and/or type of an existing column.
Definition: RInterface.hxx:518
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a two-dimensional profile (lazy action).
RResultPtr<::TGraphAsymmErrors > GraphAsymmErrors(std::string_view x="", std::string_view y="", std::string_view exl="", std::string_view exh="", std::string_view eyl="", std::string_view eyh="")
Fill and return a TGraphAsymmErrors object (lazy action).
RResultPtr< RInterface< RLoopManager > > Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, const RSnapshotOptions &options=RSnapshotOptions())
Save selected columns to disk, in a new TTree treename in file filename.
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName="")
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface< Proxied, DS_t > DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns={})
Define a new column with a value dependent on the processing slot and the current entry.
Definition: RInterface.hxx:449
RResultPtr< RDFDetail::MinReturnType_t< T > > Min(std::string_view columnName="")
Return the minimum of processed column values (lazy action).
RResultPtr< T > Reduce(F f, std::string_view columnName="")
Execute a user-defined reduce operation on the values of a column.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
RInterface< RDFDetail::RJittedFilter, DS_t > Filter(std::string_view expression, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:346
RResultPtr<::TProfile2D > Profile2D(const TProfile2DModel &model)
Fill and return a two-dimensional profile (lazy action).
std::string GetColumnType(std::string_view column)
Return the type of a given column as a string.
ColumnNames_t GetDefinedColumnNames()
Returns the names of the defined columns.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, const ColumnNames_t &columns={}, std::string_view name="")
Append a filter to the call graph.
Definition: RInterface.hxx:287
RResultPtr< U > Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
Execute a user-defined accumulation operation on the processed column values in each processing slot.
RInterface(RInterface &&)=default
Move-ctor for RInterface.
RResultPtr< T > Reduce(F f, std::string_view columnName, const T &redIdentity)
Execute a user-defined reduce operation on the values of a column.
void CheckIMTDisabled(std::string_view callerName)
unsigned int GetNRuns() const
Gets the number of event loops run.
RResultPtr<::TH3D > Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view v3Name, std::string_view wName)
Fill and return a three-dimensional histogram (lazy action).
bool HasColumn(std::string_view columnName)
Checks if a column is present in the dataset.
RInterface< RDFDetail::RFilter< F, Proxied >, DS_t > Filter(F f, std::string_view name)
Append a filter to the call graph.
Definition: RInterface.hxx:310
RInterface< RDFDetail::RRange< Proxied >, DS_t > Range(unsigned int begin, unsigned int end, unsigned int stride=1)
Creates a node that filters entries based on range: [begin, end).
std::vector< std::string > GetColumnTypeNamesList(const ColumnNames_t &columnList)
std::vector< std::string > GetFilterNames()
Returns the names of the filters created.
RResultPtr<::TH1D > Histo1D(const TH1DModel &model={"", "", 128u, 0., 0.}, std::string_view vName="")
Fill and return a one-dimensional histogram with the values of a column (lazy action).
RResultPtr<::TProfile > Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
Fill and return a one-dimensional profile (lazy action).
RInterface(const std::shared_ptr< Proxied > &proxied, RLoopManager &lm, const RDFInternal::RColumnRegister &columns, RDataSource *ds)
RResultPtr<::TH3D > Histo3D(const TH3DModel &model)
RDFDescription Describe()
Return information about the dataframe.
RVariationsDescription GetVariations() const
Return a descriptor for the systematic variations registered in this branch of the computation graph.
RResultPtr< RDFDetail::MaxReturnType_t< T > > Max(std::string_view columnName="")
Return the maximum of processed column values (lazy action).
A RDataSource implementation which is built on top of result proxies.
Definition: RLazyDSImpl.hxx:41
Smart pointer for the return type of actions.
Definition: RResultPtr.hxx:103
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
Definition: RSampleInfo.hxx:32
A descriptor for the systematic variations known to a given RDataFrame node.
ROOT's RDataFrame offers a modern, high-level interface for analysis of data stored in TTree ,...
Definition: RDataFrame.hxx:40
A chain is a collection of files containing TTree objects.
Definition: TChain.h:33
TDirectory::TContext keeps track and restore the current directory.
Definition: TDirectory.h:89
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
Statistical variable, defined by its mean and variance (RMS).
Definition: TStatistic.h:33
RFriendInfo GetFriendInfo(const TTree &tree)
Get and store the names, aliases and file names of the direct friends of the tree.
std::vector< std::string > GetFileNamesFromTree(const TTree &tree)
Get and store the file names associated with the input tree.
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
basic_string_view< char > string_view
#define F(x, y, z)
RResultPtr< T > MakeResultPtr(const std::shared_ptr< T > &r, RLoopManager &df, std::shared_ptr< ROOT::Internal::RDF::RActionBase > actionPtr)
Create a RResultPtr and set its pointer to the corresponding RAction This overload is invoked by non-...
Definition: RResultPtr.hxx:430
ParsedTreePath ParseTreePath(std::string_view fullTreeName)
std::shared_ptr< RJittedVariation > BookVariationJit(const std::vector< std::string > &colNames, std::string_view variationName, const std::vector< std::string > &variationTags, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &colRegister, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Vary call.
const std::type_info & TypeName2TypeID(const std::string &name)
Return the type_info associated to a name.
Definition: RDFUtils.cxx:51
std::vector< std::string > GetBranchNames(TTree &t, bool allowDuplicates=true)
Get all the branches names, including the ones of the friend trees.
void CheckValidCppVarName(std::string_view var, const std::string &where)
std::string ColumnName2ColumnTypeName(const std::string &colName, TTree *, RDataSource *, RDefineBase *, bool vector2rvec=true)
Return a string containing the type of the given branch.
Definition: RDFUtils.cxx:224
std::shared_ptr< RNodeBase > UpcastNode(std::shared_ptr< RNodeBase > ptr)
std::string TypeID2TypeName(const std::type_info &id)
Returns the name of a type starting from its type_info An empty string is returned in case of failure...
Definition: RDFUtils.cxx:99
ColumnNames_t GetValidatedColumnNames(RLoopManager &lm, const unsigned int nColumns, const ColumnNames_t &columns, const RColumnRegister &customColumns, RDataSource *ds)
Given the desired number of columns and the user-provided list of columns:
std::vector< std::string > GetFilterNames(const std::shared_ptr< RLoopManager > &loopManager)
unsigned int GetColumnWidth(const std::vector< std::string > &names, const unsigned int minColumnSpace=8u)
Get optimal column width for printing a table given the names and the desired minimal space between c...
Definition: RDFUtils.cxx:374
std::string PrettyPrintAddr(const void *const addr)
std::string JitBuildAction(const ColumnNames_t &cols, std::shared_ptr< RDFDetail::RNodeBase > *prevNode, const std::type_info &helperArgType, const std::type_info &at, void *helperArgOnHeap, TTree *tree, const unsigned int nSlots, const RColumnRegister &customCols, RDataSource *ds, std::weak_ptr< RJittedAction > *jittedActionOnHeap)
ColumnNames_t GetTopLevelBranchNames(TTree &t)
Get all the top-level branches names, including the ones of the friend trees.
void CheckTypesAndPars(unsigned int nTemplateParams, unsigned int nColumnNames)
bool AtLeastOneEmptyString(const std::vector< std::string_view > strings)
void CheckForDefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is not already there.
bool IsInternalColumn(std::string_view colName)
Whether custom column with name colName is an "internal" column such as rdfentry_ or rdfslot_.
Definition: RDFUtils.cxx:365
ColumnNames_t FilterArraySizeColNames(const ColumnNames_t &columnNames, const std::string &action)
Take a list of column names, return that list with entries starting by '#' filtered out.
std::shared_ptr< RJittedDefine > BookDefinePerSampleJit(std::string_view name, std::string_view expression, RLoopManager &lm, const RColumnRegister &customCols, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a DefinePerSample call.
void CheckForRedefinition(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols, const ColumnNames_t &treeColumns, const ColumnNames_t &dataSourceColumns)
Throw if column definedColView is already there.
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
ColumnNames_t ConvertRegexToColumns(const ColumnNames_t &colNames, std::string_view columnNameRegexp, std::string_view callerName)
std::shared_ptr< RJittedDefine > BookDefineJit(std::string_view name, std::string_view expression, RLoopManager &lm, RDataSource *ds, const RColumnRegister &customCols, const ColumnNames_t &branches, std::shared_ptr< RNodeBase > *upcastNodeOnHeap)
Book the jitting of a Define call.
std::vector< std::string > GetValidatedArgTypes(const ColumnNames_t &colNames, const RColumnRegister &colRegister, TTree *tree, RDataSource *ds, const std::string &context, bool vector2rvec)
void CheckForNoVariations(const std::string &where, std::string_view definedColView, const RColumnRegister &customCols)
Throw if the column has systematic variations attached.
void TriggerRun(ROOT::RDF::RNode &node)
Trigger the execution of an RDataFrame computation graph.
std::shared_ptr< RDFDetail::RJittedFilter > BookFilterJit(std::shared_ptr< RDFDetail::RNodeBase > *prevNodeOnHeap, std::string_view name, std::string_view expression, const ColumnNames_t &branches, const RColumnRegister &customCols, TTree *tree, RDataSource *ds)
Book the jitting of a Filter call.
double T(double x)
Definition: ChebyshevPol.h:34
std::vector< std::string > ColumnNames_t
Definition: Utils.hxx:35
RInterface<::ROOT::Detail::RDF::RNodeBase, void > RNode
ROOT type_traits extensions.
Definition: TypeTraits.hxx:21
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
void EnableImplicitMT(UInt_t numthreads=0)
Enable ROOT's implicit multi-threading for all objects and methods that provide an internal paralleli...
Definition: TROOT.cxx:527
Bool_t IsImplicitMTEnabled()
Returns true if the implicit multi-threading in ROOT is enabled.
Definition: TROOT.cxx:558
void DisableImplicitMT()
Disables the implicit multi-threading in ROOT (see EnableImplicitMT).
Definition: TROOT.cxx:544
std::pair< Double_t, Double_t > Range_t
Definition: TGLUtil.h:1196
RooArgSet S(Args_t &&... args)
Definition: RooArgSet.h:241
char * DemangleTypeIdName(const std::type_info &ti, int &errorCode)
Demangle in a portable way the type id name.
static constexpr double s
Definition: graph.py:1
Definition: tree.py:1
type is TypeList if MustRemove is false, otherwise it is a TypeList with the first type removed
Definition: Utils.hxx:139
A collection of options to steer the creation of the dataset on file.
bool fLazy
Do not start the event loop when Snapshot is called.
A struct which stores the parameters of a TH1D.
Definition: HistoModels.hxx:30
std::shared_ptr<::TH1D > GetHistogram() const
A struct which stores the parameters of a TH2D.
Definition: HistoModels.hxx:48
std::shared_ptr<::TH2D > GetHistogram() const
A struct which stores the parameters of a TH3D.
Definition: HistoModels.hxx:73
std::shared_ptr<::TH3D > GetHistogram() const
A struct which stores the parameters of a THnD.
std::shared_ptr<::THnD > GetHistogram() const
A struct which stores the parameters of a TProfile.
std::shared_ptr<::TProfile > GetProfile() const
A struct which stores the parameters of a TProfile2D.
std::shared_ptr<::TProfile2D > GetProfile() const
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:25