Logo ROOT  
Reference Guide
ActionHelpers.hxx
Go to the documentation of this file.
1/**
2 \file ROOT/RDF/ActionHelpers.hxx
3 \ingroup dataframe
4 \author Enrico Guiraud, CERN
5 \author Danilo Piparo, CERN
6 \date 2016-12
7 \author Vincenzo Eduardo Padulano
8 \date 2020-06
9*/
10
11/*************************************************************************
12 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
13 * All rights reserved. *
14 * *
15 * For the licensing terms see $ROOTSYS/LICENSE. *
16 * For the list of contributors see $ROOTSYS/README/CREDITS. *
17 *************************************************************************/
18
19#ifndef ROOT_RDFOPERATIONS
20#define ROOT_RDFOPERATIONS
21
22#include "Compression.h"
23#include "ROOT/RStringView.hxx"
24#include "ROOT/RVec.hxx"
25#include "ROOT/TBufferMerger.hxx" // for SnapshotHelper
28#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/TypeTraits.hxx"
31#include "ROOT/RDF/RDisplay.hxx"
32#include "RtypesCore.h"
33#include "TBranch.h"
34#include "TClassEdit.h"
35#include "TClassRef.h"
36#include "TDirectory.h"
37#include "TError.h" // for R__ASSERT, Warning
38#include "TFile.h" // for SnapshotHelper
39#include "TH1.h"
40#include "TGraph.h"
41#include "TGraphAsymmErrors.h"
42#include "TLeaf.h"
43#include "TObject.h"
44#include "TTree.h"
45#include "TTreeReader.h" // for SnapshotHelper
46#include "TStatistic.h"
49
50#include <algorithm>
51#include <functional>
52#include <limits>
53#include <memory>
54#include <stdexcept>
55#include <string>
56#include <type_traits>
57#include <utility> // std::index_sequence
58#include <vector>
59#include <iomanip>
60#include <numeric> // std::accumulate in MeanHelper
61
62/// \cond HIDDEN_SYMBOLS
63
64namespace ROOT {
65namespace Internal {
66namespace RDF {
67using namespace ROOT::TypeTraits;
68using namespace ROOT::VecOps;
69using namespace ROOT::RDF;
70using namespace ROOT::Detail::RDF;
71
72using Hist_t = ::TH1D;
73
74class RBranchSet {
75 std::vector<TBranch *> fBranches;
76 std::vector<std::string> fNames;
77
78public:
79 TBranch *Get(const std::string &name) const
80 {
81 auto it = std::find(fNames.begin(), fNames.end(), name);
82 if (it == fNames.end())
83 return nullptr;
84 return fBranches[std::distance(fNames.begin(), it)];
85 }
86
87 void Insert(const std::string &name, TBranch *address)
88 {
89 if (address == nullptr) {
90 throw std::logic_error("Trying to insert a null branch address.");
91 }
92 if (std::find(fBranches.begin(), fBranches.end(), address) != fBranches.end()) {
93 throw std::logic_error("Trying to insert a branch address that's already present.");
94 }
95 if (std::find(fNames.begin(), fNames.end(), name) != fNames.end()) {
96 throw std::logic_error("Trying to insert a branch name that's already present.");
97 }
98 fNames.emplace_back(name);
99 fBranches.emplace_back(address);
100 }
101
102 void Clear()
103 {
104 fBranches.clear();
105 fNames.clear();
106 }
107
108 void AssertNoNullBranchAddresses()
109 {
110 std::vector<TBranch *> branchesWithNullAddress;
111 std::copy_if(fBranches.begin(), fBranches.end(), std::back_inserter(branchesWithNullAddress),
112 [](TBranch *b) { return b->GetAddress() == nullptr; });
113
114 if (branchesWithNullAddress.empty())
115 return;
116
117 // otherwise build error message and throw
118 std::vector<std::string> missingBranchNames;
119 std::transform(branchesWithNullAddress.begin(), branchesWithNullAddress.end(),
120 std::back_inserter(missingBranchNames), [](TBranch *b) { return b->GetName(); });
121 std::string msg = "RDataFrame::Snapshot:";
122 if (missingBranchNames.size() == 1) {
123 msg += " branch " + missingBranchNames[0] +
124 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
125 "it is";
126 } else {
127 msg += " branches ";
128 for (const auto &bName : missingBranchNames)
129 msg += bName + ", ";
130 msg.resize(msg.size() - 2); // remove last ", "
131 msg +=
132 " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
133 }
134 msg += " not part of the set of branches that are being written out.";
135 throw std::runtime_error(msg);
136 }
137};
138
139/// The container type for each thread's partial result in an action helper
140// We have to avoid to instantiate std::vector<bool> as that makes it impossible to return a reference to one of
141// the thread-local results. In addition, a common definition for the type of the container makes it easy to swap
142// the type of the underlying container if e.g. we see problems with false sharing of the thread-local results..
143template <typename T>
144using Results = std::conditional_t<std::is_same<T, bool>::value, std::deque<T>, std::vector<T>>;
145
146template <typename F>
147class R__CLING_PTRCHECK(off) ForeachSlotHelper : public RActionImpl<ForeachSlotHelper<F>> {
148 F fCallable;
149
150public:
152 ForeachSlotHelper(F &&f) : fCallable(f) {}
153 ForeachSlotHelper(ForeachSlotHelper &&) = default;
154 ForeachSlotHelper(const ForeachSlotHelper &) = delete;
155
156 void InitTask(TTreeReader *, unsigned int) {}
157
158 template <typename... Args>
159 void Exec(unsigned int slot, Args &&... args)
160 {
161 // check that the decayed types of Args are the same as the branch types
162 static_assert(std::is_same<TypeList<std::decay_t<Args>...>, ColumnTypes_t>::value, "");
163 fCallable(slot, std::forward<Args>(args)...);
164 }
165
166 void Initialize() { /* noop */}
167
168 void Finalize() { /* noop */}
169
170 std::string GetActionName() { return "ForeachSlot"; }
171};
172
173class R__CLING_PTRCHECK(off) CountHelper : public RActionImpl<CountHelper> {
174 std::shared_ptr<ULong64_t> fResultCount;
175 Results<ULong64_t> fCounts;
176
177public:
178 using ColumnTypes_t = TypeList<>;
179 CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots);
180 CountHelper(CountHelper &&) = default;
181 CountHelper(const CountHelper &) = delete;
182 void InitTask(TTreeReader *, unsigned int) {}
183 void Exec(unsigned int slot);
184 void Initialize() { /* noop */}
185 void Finalize();
186
187 // Helper functions for RMergeableValue
188 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
189 {
190 return std::make_unique<RMergeableCount>(*fResultCount);
191 }
192
193 ULong64_t &PartialUpdate(unsigned int slot);
194
195 std::string GetActionName() { return "Count"; }
196
197 CountHelper MakeNew(void *newResult)
198 {
199 auto &result = *static_cast<std::shared_ptr<ULong64_t> *>(newResult);
200 return CountHelper(result, fCounts.size());
201 }
202};
203
204template <typename RNode_t>
205class R__CLING_PTRCHECK(off) ReportHelper : public RActionImpl<ReportHelper<RNode_t>> {
206 std::shared_ptr<RCutFlowReport> fReport;
207 /// Non-owning pointer, never null. As usual, the node is owned by its children nodes (and therefore indirectly by
208 /// the RAction corresponding to this action helper).
209 RNode_t *fNode;
210 bool fReturnEmptyReport;
211
212public:
213 using ColumnTypes_t = TypeList<>;
214 ReportHelper(const std::shared_ptr<RCutFlowReport> &report, RNode_t *node, bool emptyRep)
215 : fReport(report), fNode(node), fReturnEmptyReport(emptyRep){};
216 ReportHelper(ReportHelper &&) = default;
217 ReportHelper(const ReportHelper &) = delete;
218 void InitTask(TTreeReader *, unsigned int) {}
219 void Exec(unsigned int /* slot */) {}
220 void Initialize() { /* noop */}
221 void Finalize()
222 {
223 if (!fReturnEmptyReport)
224 fNode->Report(*fReport);
225 }
226
227 std::string GetActionName() { return "Report"; }
228
229 // TODO implement MakeNew. Requires some smartness in passing the appropriate previous node.
230};
231
232/// This helper fills TH1Ds for which no axes were specified by buffering the fill values to pick good axes limits.
233///
234/// TH1Ds have an automatic mechanism to pick good limits based on the first N entries they were filled with, but
235/// that does not work in multi-thread event loops as it might yield histograms with incompatible binning in each
236/// thread, making it impossible to merge the per-thread results.
237/// Instead, this helper delays the decision on the axes limits until all threads have done processing, synchronizing
238/// the decision on the limits as part of the merge operation.
239class R__CLING_PTRCHECK(off) BufferedFillHelper : public RActionImpl<BufferedFillHelper> {
240 // this sets a total initial size of 16 MB for the buffers (can increase)
241 static constexpr unsigned int fgTotalBufSize = 2097152;
242 using BufEl_t = double;
243 using Buf_t = std::vector<BufEl_t>;
244
245 std::vector<Buf_t> fBuffers;
246 std::vector<Buf_t> fWBuffers;
247 std::shared_ptr<Hist_t> fResultHist;
248 unsigned int fNSlots;
249 unsigned int fBufSize;
250 /// Histograms containing "snapshots" of partial results. Non-null only if a registered callback requires it.
251 Results<std::unique_ptr<Hist_t>> fPartialHists;
252 Buf_t fMin;
253 Buf_t fMax;
254
255 void UpdateMinMax(unsigned int slot, double v);
256
257public:
258 BufferedFillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots);
259 BufferedFillHelper(BufferedFillHelper &&) = default;
260 BufferedFillHelper(const BufferedFillHelper &) = delete;
261 void InitTask(TTreeReader *, unsigned int) {}
262 void Exec(unsigned int slot, double v);
263 void Exec(unsigned int slot, double v, double w);
264
266 void Exec(unsigned int slot, const T &vs)
267 {
268 auto &thisBuf = fBuffers[slot];
269 // range-based for results in warnings on some compilers due to vector<bool>'s custom reference type
270 for (auto v = vs.begin(); v != vs.end(); ++v) {
271 UpdateMinMax(slot, *v);
272 thisBuf.emplace_back(*v); // TODO: Can be optimised in case T == BufEl_t
273 }
274 }
275
277 void Exec(unsigned int slot, const T &vs, const W &ws)
278 {
279 auto &thisBuf = fBuffers[slot];
280
281 for (auto &v : vs) {
282 UpdateMinMax(slot, v);
283 thisBuf.emplace_back(v);
284 }
285
286 auto &thisWBuf = fWBuffers[slot];
287 for (auto &w : ws) {
288 thisWBuf.emplace_back(w); // TODO: Can be optimised in case T == BufEl_t
289 }
290 }
291
293 void Exec(unsigned int slot, const T &vs, const W w)
294 {
295 auto &thisBuf = fBuffers[slot];
296 for (auto &v : vs) {
297 UpdateMinMax(slot, v);
298 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
299 }
300
301 auto &thisWBuf = fWBuffers[slot];
302 thisWBuf.insert(thisWBuf.end(), vs.size(), w);
303 }
304
305 // ROOT-10092: Filling with a scalar as first column and a collection as second is not supported
307 void Exec(unsigned int, const T &, const W &)
308 {
309 throw std::runtime_error(
310 "Cannot fill object if the type of the first column is a scalar and the one of the second a container.");
311 }
312
313 Hist_t &PartialUpdate(unsigned int);
314
315 void Initialize() { /* noop */}
316
317 void Finalize();
318
319 // Helper functions for RMergeableValue
320 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
321 {
322 return std::make_unique<RMergeableFill<Hist_t>>(*fResultHist);
323 }
324
325 std::string GetActionName()
326 {
327 return std::string(fResultHist->IsA()->GetName()) + "\\n" + std::string(fResultHist->GetName());
328 }
329
330 BufferedFillHelper MakeNew(void *newResult)
331 {
332 auto &result = *static_cast<std::shared_ptr<Hist_t> *>(newResult);
333 result->Reset();
334 result->SetDirectory(nullptr);
335 return BufferedFillHelper(result, fNSlots);
336 }
337};
338
339extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<float> &);
340extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<double> &);
341extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<char> &);
342extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<int> &);
343extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<unsigned int> &);
344extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<float> &, const std::vector<float> &);
345extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<double> &, const std::vector<double> &);
346extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<char> &, const std::vector<char> &);
347extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<int> &, const std::vector<int> &);
348extern template void
349BufferedFillHelper::Exec(unsigned int, const std::vector<unsigned int> &, const std::vector<unsigned int> &);
350
351/// The generic Fill helper: it calls Fill on per-thread objects and then Merge to produce a final result.
352/// For one-dimensional histograms, if no axes are specified, RDataFrame uses BufferedFillHelper instead.
353template <typename HIST = Hist_t>
354class R__CLING_PTRCHECK(off) FillHelper : public RActionImpl<FillHelper<HIST>> {
355 std::vector<HIST *> fObjects;
356
357 template <typename H = HIST, typename = decltype(std::declval<H>().Reset())>
358 void ResetIfPossible(H *h)
359 {
360 h->Reset();
361 }
362
363 void ResetIfPossible(TStatistic *h) { *h = TStatistic(); }
364
365 // cannot safely re-initialize variations of the result, hence error out
366 void ResetIfPossible(...)
367 {
368 throw std::runtime_error(
369 "A systematic variation was requested for a custom Fill action, but the type of the object to be filled does "
370 "not implement a Reset method, so we cannot safely re-initialize variations of the result. Aborting.");
371 }
372
373 void UnsetDirectoryIfPossible(TH1 *h) {
374 h->SetDirectory(nullptr);
375 }
376
377 void UnsetDirectoryIfPossible(...) {}
378
379 // Merge overload for types with Merge(TCollection*), like TH1s
381 auto Merge(std::vector<H *> &objs, int /*toincreaseoverloadpriority*/)
382 -> decltype(objs[0]->Merge((TCollection *)nullptr), void())
383 {
384 TList l;
385 for (auto it = ++objs.begin(); it != objs.end(); ++it)
386 l.Add(*it);
387 objs[0]->Merge(&l);
388 }
389
390 // Merge overload for types with Merge(const std::vector&)
391 template <typename H>
392 auto Merge(std::vector<H *> &objs, double /*toloweroverloadpriority*/)
393 -> decltype(objs[0]->Merge(std::vector<HIST *>{}), void())
394 {
395 objs[0]->Merge({++objs.begin(), objs.end()});
396 }
397
398 // Merge overload to error out in case no valid HIST::Merge method was detected
399 template <typename T>
400 void Merge(T, ...)
401 {
402 static_assert(sizeof(T) < 0,
403 "The type passed to Fill does not provide a Merge(TCollection*) or Merge(const std::vector&) method.");
404 }
405
406 // class which wraps a pointer and implements a no-op increment operator
407 template <typename T>
408 class ScalarConstIterator {
409 const T *obj_;
410
411 public:
412 ScalarConstIterator(const T *obj) : obj_(obj) {}
413 const T &operator*() const { return *obj_; }
414 ScalarConstIterator<T> &operator++() { return *this; }
415 };
416
417 // helper functions which provide one implementation for scalar types and another for containers
418 // TODO these could probably all be replaced by inlined lambdas and/or constexpr if statements
419 // in c++17 or later
420
421 // return unchanged value for scalar
423 ScalarConstIterator<T> MakeBegin(const T &val)
424 {
425 return ScalarConstIterator<T>(&val);
426 }
427
428 // return iterator to beginning of container
430 auto MakeBegin(const T &val)
431 {
432 return std::begin(val);
433 }
434
435 // return 1 for scalars
437 std::size_t GetSize(const T &)
438 {
439 return 1;
440 }
441
442 // return container size
444 std::size_t GetSize(const T &val)
445 {
446#if __cplusplus >= 201703L
447 return std::size(val);
448#else
449 return val.size();
450#endif
451 }
452
453 template <std::size_t ColIdx, typename End_t, typename... Its>
454 void ExecLoop(unsigned int slot, End_t end, Its... its)
455 {
456 auto *thisSlotH = fObjects[slot];
457 // loop increments all of the iterators while leaving scalars unmodified
458 // TODO this could be simplified with fold expressions or std::apply in C++17
459 auto nop = [](auto &&...) {};
460 for (; GetNthElement<ColIdx>(its...) != end; nop(++its...)) {
461 thisSlotH->Fill(*its...);
462 }
463 }
464
465public:
466 FillHelper(FillHelper &&) = default;
467 FillHelper(const FillHelper &) = delete;
468
469 FillHelper(const std::shared_ptr<HIST> &h, const unsigned int nSlots) : fObjects(nSlots, nullptr)
470 {
471 fObjects[0] = h.get();
472 // Initialize all other slots
473 for (unsigned int i = 1; i < nSlots; ++i) {
474 fObjects[i] = new HIST(*fObjects[0]);
475 UnsetDirectoryIfPossible(fObjects[i]);
476 }
477 }
478
479 void InitTask(TTreeReader *, unsigned int) {}
480
481 // no container arguments
482 template <typename... ValTypes, std::enable_if_t<!Disjunction<IsDataContainer<ValTypes>...>::value, int> = 0>
483 auto Exec(unsigned int slot, const ValTypes &...x) -> decltype(fObjects[slot]->Fill(x...), void())
484 {
485 fObjects[slot]->Fill(x...);
486 }
487
488 // at least one container argument
489 template <typename... Xs, std::enable_if_t<Disjunction<IsDataContainer<Xs>...>::value, int> = 0>
490 auto Exec(unsigned int slot, const Xs &...xs) -> decltype(fObjects[slot]->Fill(*MakeBegin(xs)...), void())
491 {
492 // array of bools keeping track of which inputs are containers
493 constexpr std::array<bool, sizeof...(Xs)> isContainer{IsDataContainer<Xs>::value...};
494
495 // index of the first container input
496 constexpr std::size_t colidx = FindIdxTrue(isContainer);
497 // if this happens, there is a bug in the implementation
498 static_assert(colidx < sizeof...(Xs), "Error: index of collection-type argument not found.");
499
500 // get the end iterator to the first container
501 auto const xrefend = std::end(GetNthElement<colidx>(xs...));
502
503 // array of container sizes (1 for scalars)
504 std::array<std::size_t, sizeof...(xs)> sizes = {{GetSize(xs)...}};
505
506 for (std::size_t i = 0; i < sizeof...(xs); ++i) {
507 if (isContainer[i] && sizes[i] != sizes[colidx]) {
508 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
509 }
510 }
511
512 ExecLoop<colidx>(slot, xrefend, MakeBegin(xs)...);
513 }
514
515 template <typename T = HIST>
516 void Exec(...)
517 {
518 static_assert(sizeof(T) < 0,
519 "When filling an object with RDataFrame (e.g. via a Fill action) the number or types of the "
520 "columns passed did not match the signature of the object's `Fill` method.");
521 }
522
523 void Initialize() { /* noop */}
524
525 void Finalize()
526 {
527 if (fObjects.size() == 1)
528 return;
529
530 Merge(fObjects, /*toselectcorrectoverload=*/0);
531
532 // delete the copies we created for the slots other than the first
533 for (auto it = ++fObjects.begin(); it != fObjects.end(); ++it)
534 delete *it;
535 }
536
537 HIST &PartialUpdate(unsigned int slot) { return *fObjects[slot]; }
538
539 // Helper functions for RMergeableValue
540 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
541 {
542 return std::make_unique<RMergeableFill<HIST>>(*fObjects[0]);
543 }
544
545 // if the fObjects vector type is derived from TObject, return the name of the object
547 std::string GetActionName()
548 {
549 return std::string(fObjects[0]->IsA()->GetName()) + "\\n" + std::string(fObjects[0]->GetName());
550 }
551
552 // if fObjects is not derived from TObject, indicate it is some other object
554 std::string GetActionName()
555 {
556 return "Fill custom object";
557 }
558
559 template <typename H = HIST>
560 FillHelper MakeNew(void *newResult)
561 {
562 auto &result = *static_cast<std::shared_ptr<H> *>(newResult);
563 ResetIfPossible(result.get());
564 UnsetDirectoryIfPossible(result.get());
565 return FillHelper(result, fObjects.size());
566 }
567};
568
569class R__CLING_PTRCHECK(off) FillTGraphHelper : public ROOT::Detail::RDF::RActionImpl<FillTGraphHelper> {
570public:
571 using Result_t = ::TGraph;
572
573private:
574 std::vector<::TGraph *> fGraphs;
575
576public:
577 FillTGraphHelper(FillTGraphHelper &&) = default;
578 FillTGraphHelper(const FillTGraphHelper &) = delete;
579
580 FillTGraphHelper(const std::shared_ptr<::TGraph> &g, const unsigned int nSlots) : fGraphs(nSlots, nullptr)
581 {
582 fGraphs[0] = g.get();
583 // Initialize all other slots
584 for (unsigned int i = 1; i < nSlots; ++i) {
585 fGraphs[i] = new TGraph(*fGraphs[0]);
586 }
587 }
588
589 void Initialize() {}
590 void InitTask(TTreeReader *, unsigned int) {}
591
592 // case: both types are container types
593 template <typename X0, typename X1,
595 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
596 {
597 if (x0s.size() != x1s.size()) {
598 throw std::runtime_error("Cannot fill Graph with values in containers of different sizes.");
599 }
600 auto *thisSlotG = fGraphs[slot];
601 auto x0sIt = std::begin(x0s);
602 const auto x0sEnd = std::end(x0s);
603 auto x1sIt = std::begin(x1s);
604 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
605 thisSlotG->SetPoint(thisSlotG->GetN(), *x0sIt, *x1sIt);
606 }
607 }
608
609 // case: both types are non-container types, e.g. scalars
610 template <typename X0, typename X1,
612 void Exec(unsigned int slot, X0 x0, X1 x1)
613 {
614 auto thisSlotG = fGraphs[slot];
615 thisSlotG->SetPoint(thisSlotG->GetN(), x0, x1);
616 }
617
618 // case: types are combination of containers and non-containers
619 // this is not supported, error out
620 template <typename X0, typename X1, typename... ExtraArgsToLowerPriority>
621 void Exec(unsigned int, X0, X1, ExtraArgsToLowerPriority...)
622 {
623 throw std::runtime_error("Graph was applied to a mix of scalar values and collections. This is not supported.");
624 }
625
626 void Finalize()
627 {
628 const auto nSlots = fGraphs.size();
629 auto resGraph = fGraphs[0];
630 TList l;
631 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
632 for (unsigned int slot = 1; slot < nSlots; ++slot) {
633 l.Add(fGraphs[slot]);
634 }
635 resGraph->Merge(&l);
636 }
637
638 // Helper functions for RMergeableValue
639 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
640 {
641 return std::make_unique<RMergeableFill<Result_t>>(*fGraphs[0]);
642 }
643
644 std::string GetActionName() { return "Graph"; }
645
646 Result_t &PartialUpdate(unsigned int slot) { return *fGraphs[slot]; }
647
648 FillTGraphHelper MakeNew(void *newResult)
649 {
650 auto &result = *static_cast<std::shared_ptr<TGraph> *>(newResult);
651 result->Set(0);
652 return FillTGraphHelper(result, fGraphs.size());
653 }
654};
655
656class R__CLING_PTRCHECK(off) FillTGraphAsymmErrorsHelper
657 : public ROOT::Detail::RDF::RActionImpl<FillTGraphAsymmErrorsHelper> {
658public:
659 using Result_t = ::TGraphAsymmErrors;
660
661private:
662 std::vector<::TGraphAsymmErrors *> fGraphAsymmErrors;
663
664public:
665 FillTGraphAsymmErrorsHelper(FillTGraphAsymmErrorsHelper &&) = default;
666 FillTGraphAsymmErrorsHelper(const FillTGraphAsymmErrorsHelper &) = delete;
667
668 FillTGraphAsymmErrorsHelper(const std::shared_ptr<::TGraphAsymmErrors> &g, const unsigned int nSlots)
669 : fGraphAsymmErrors(nSlots, nullptr)
670 {
671 fGraphAsymmErrors[0] = g.get();
672 // Initialize all other slots
673 for (unsigned int i = 1; i < nSlots; ++i) {
674 fGraphAsymmErrors[i] = new TGraphAsymmErrors(*fGraphAsymmErrors[0]);
675 }
676 }
677
678 void Initialize() {}
679 void InitTask(TTreeReader *, unsigned int) {}
680
681 // case: all types are container types
682 template <
683 typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
686 int> = 0>
687 void
688 Exec(unsigned int slot, const X &xs, const Y &ys, const EXL &exls, const EXH &exhs, const EYL &eyls, const EYH &eyhs)
689 {
690 if ((xs.size() != ys.size()) || (xs.size() != exls.size()) || (xs.size() != exhs.size()) ||
691 (xs.size() != eyls.size()) || (xs.size() != eyhs.size())) {
692 throw std::runtime_error("Cannot fill GraphAsymmErrors with values in containers of different sizes.");
693 }
694 auto *thisSlotG = fGraphAsymmErrors[slot];
695 auto xsIt = std::begin(xs);
696 auto ysIt = std::begin(ys);
697 auto exlsIt = std::begin(exls);
698 auto exhsIt = std::begin(exhs);
699 auto eylsIt = std::begin(eyls);
700 auto eyhsIt = std::begin(eyhs);
701 while (xsIt != std::end(xs)) {
702 const auto n = thisSlotG->GetN(); // must use the same `n` for SetPoint and SetPointError
703 thisSlotG->SetPoint(n, *xsIt++, *ysIt++);
704 thisSlotG->SetPointError(n, *exlsIt++, *exhsIt++, *eylsIt++, *eyhsIt++);
705 }
706 }
707
708 // case: all types are non-container types, e.g. scalars
709 template <
710 typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
713 int> = 0>
714 void Exec(unsigned int slot, X x, Y y, EXL exl, EXH exh, EYL eyl, EYH eyh)
715 {
716 auto thisSlotG = fGraphAsymmErrors[slot];
717 const auto n = thisSlotG->GetN();
718 thisSlotG->SetPoint(n, x, y);
719 thisSlotG->SetPointError(n, exl, exh, eyl, eyh);
720 }
721
722 // case: types are combination of containers and non-containers
723 // this is not supported, error out
724 template <typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
725 typename... ExtraArgsToLowerPriority>
726 void Exec(unsigned int, X, Y, EXL, EXH, EYL, EYH, ExtraArgsToLowerPriority...)
727 {
728 throw std::runtime_error(
729 "GraphAsymmErrors was applied to a mix of scalar values and collections. This is not supported.");
730 }
731
732 void Finalize()
733 {
734 const auto nSlots = fGraphAsymmErrors.size();
735 auto resGraphAsymmErrors = fGraphAsymmErrors[0];
736 TList l;
737 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
738 for (unsigned int slot = 1; slot < nSlots; ++slot) {
739 l.Add(fGraphAsymmErrors[slot]);
740 }
741 resGraphAsymmErrors->Merge(&l);
742 }
743
744 // Helper functions for RMergeableValue
745 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
746 {
747 return std::make_unique<RMergeableFill<Result_t>>(*fGraphAsymmErrors[0]);
748 }
749
750 std::string GetActionName() { return "GraphAsymmErrors"; }
751
752 Result_t &PartialUpdate(unsigned int slot) { return *fGraphAsymmErrors[slot]; }
753
754 FillTGraphAsymmErrorsHelper MakeNew(void *newResult)
755 {
756 auto &result = *static_cast<std::shared_ptr<TGraphAsymmErrors> *>(newResult);
757 result->Set(0);
758 return FillTGraphAsymmErrorsHelper(result, fGraphAsymmErrors.size());
759 }
760};
761
762// In case of the take helper we have 4 cases:
763// 1. The column is not an RVec, the collection is not a vector
764// 2. The column is not an RVec, the collection is a vector
765// 3. The column is an RVec, the collection is not a vector
766// 4. The column is an RVec, the collection is a vector
767
768template <typename V, typename COLL>
769void FillColl(V&& v, COLL& c) {
770 c.emplace_back(v);
771}
772
773// Use push_back for bool since some compilers do not support emplace_back.
774template <typename COLL>
775void FillColl(bool v, COLL& c) {
776 c.push_back(v);
777}
778
779// Case 1.: The column is not an RVec, the collection is not a vector
780// No optimisations, no transformations: just copies.
781template <typename RealT_t, typename T, typename COLL>
782class R__CLING_PTRCHECK(off) TakeHelper : public RActionImpl<TakeHelper<RealT_t, T, COLL>> {
783 Results<std::shared_ptr<COLL>> fColls;
784
785public:
786 using ColumnTypes_t = TypeList<T>;
787 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
788 {
789 fColls.emplace_back(resultColl);
790 for (unsigned int i = 1; i < nSlots; ++i)
791 fColls.emplace_back(std::make_shared<COLL>());
792 }
793 TakeHelper(TakeHelper &&);
794 TakeHelper(const TakeHelper &) = delete;
795
796 void InitTask(TTreeReader *, unsigned int) {}
797
798 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
799
800 void Initialize() { /* noop */}
801
802 void Finalize()
803 {
804 auto rColl = fColls[0];
805 for (unsigned int i = 1; i < fColls.size(); ++i) {
806 const auto &coll = fColls[i];
807 const auto end = coll->end();
808 // Use an explicit loop here to prevent compiler warnings introduced by
809 // clang's range-based loop analysis and vector<bool> references.
810 for (auto j = coll->begin(); j != end; j++) {
811 FillColl(*j, *rColl);
812 }
813 }
814 }
815
816 COLL &PartialUpdate(unsigned int slot) { return *fColls[slot].get(); }
817
818 std::string GetActionName() { return "Take"; }
819
820 TakeHelper MakeNew(void *newResult)
821 {
822 auto &result = *static_cast<std::shared_ptr<COLL> *>(newResult);
823 result->clear();
824 return TakeHelper(result, fColls.size());
825 }
826};
827
828// Case 2.: The column is not an RVec, the collection is a vector
829// Optimisations, no transformations: just copies.
830template <typename RealT_t, typename T>
831class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, T, std::vector<T>>
832 : public RActionImpl<TakeHelper<RealT_t, T, std::vector<T>>> {
833 Results<std::shared_ptr<std::vector<T>>> fColls;
834
835public:
836 using ColumnTypes_t = TypeList<T>;
837 TakeHelper(const std::shared_ptr<std::vector<T>> &resultColl, const unsigned int nSlots)
838 {
839 fColls.emplace_back(resultColl);
840 for (unsigned int i = 1; i < nSlots; ++i) {
841 auto v = std::make_shared<std::vector<T>>();
842 v->reserve(1024);
843 fColls.emplace_back(v);
844 }
845 }
846 TakeHelper(TakeHelper &&);
847 TakeHelper(const TakeHelper &) = delete;
848
849 void InitTask(TTreeReader *, unsigned int) {}
850
851 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
852
853 void Initialize() { /* noop */}
854
855 // This is optimised to treat vectors
856 void Finalize()
857 {
858 ULong64_t totSize = 0;
859 for (auto &coll : fColls)
860 totSize += coll->size();
861 auto rColl = fColls[0];
862 rColl->reserve(totSize);
863 for (unsigned int i = 1; i < fColls.size(); ++i) {
864 auto &coll = fColls[i];
865 rColl->insert(rColl->end(), coll->begin(), coll->end());
866 }
867 }
868
869 std::vector<T> &PartialUpdate(unsigned int slot) { return *fColls[slot]; }
870
871 std::string GetActionName() { return "Take"; }
872
873 TakeHelper MakeNew(void *newResult)
874 {
875 auto &result = *static_cast<std::shared_ptr<std::vector<T>> *>(newResult);
876 result->clear();
877 return TakeHelper(result, fColls.size());
878 }
879};
880
881// Case 3.: The column is a RVec, the collection is not a vector
882// No optimisations, transformations from RVecs to vectors
883template <typename RealT_t, typename COLL>
884class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, RVec<RealT_t>, COLL>
885 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, COLL>> {
886 Results<std::shared_ptr<COLL>> fColls;
887
888public:
889 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
890 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
891 {
892 fColls.emplace_back(resultColl);
893 for (unsigned int i = 1; i < nSlots; ++i)
894 fColls.emplace_back(std::make_shared<COLL>());
895 }
896 TakeHelper(TakeHelper &&);
897 TakeHelper(const TakeHelper &) = delete;
898
899 void InitTask(TTreeReader *, unsigned int) {}
900
901 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
902
903 void Initialize() { /* noop */}
904
905 void Finalize()
906 {
907 auto rColl = fColls[0];
908 for (unsigned int i = 1; i < fColls.size(); ++i) {
909 auto &coll = fColls[i];
910 for (auto &v : *coll) {
911 rColl->emplace_back(v);
912 }
913 }
914 }
915
916 std::string GetActionName() { return "Take"; }
917
918 TakeHelper MakeNew(void *newResult)
919 {
920 auto &result = *static_cast<std::shared_ptr<COLL> *>(newResult);
921 result->clear();
922 return TakeHelper(result, fColls.size());
923 }
924};
925
926// Case 4.: The column is an RVec, the collection is a vector
927// Optimisations, transformations from RVecs to vectors
928template <typename RealT_t>
929class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>
930 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>> {
931
932 Results<std::shared_ptr<std::vector<std::vector<RealT_t>>>> fColls;
933
934public:
935 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
936 TakeHelper(const std::shared_ptr<std::vector<std::vector<RealT_t>>> &resultColl, const unsigned int nSlots)
937 {
938 fColls.emplace_back(resultColl);
939 for (unsigned int i = 1; i < nSlots; ++i) {
940 auto v = std::make_shared<std::vector<RealT_t>>();
941 v->reserve(1024);
942 fColls.emplace_back(v);
943 }
944 }
945 TakeHelper(TakeHelper &&);
946 TakeHelper(const TakeHelper &) = delete;
947
948 void InitTask(TTreeReader *, unsigned int) {}
949
950 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
951
952 void Initialize() { /* noop */}
953
954 // This is optimised to treat vectors
955 void Finalize()
956 {
957 ULong64_t totSize = 0;
958 for (auto &coll : fColls)
959 totSize += coll->size();
960 auto rColl = fColls[0];
961 rColl->reserve(totSize);
962 for (unsigned int i = 1; i < fColls.size(); ++i) {
963 auto &coll = fColls[i];
964 rColl->insert(rColl->end(), coll->begin(), coll->end());
965 }
966 }
967
968 std::string GetActionName() { return "Take"; }
969
970 TakeHelper MakeNew(void *newResult)
971 {
972 auto &result = *static_cast<typename decltype(fColls)::value_type *>(newResult);
973 result->clear();
974 return TakeHelper(result, fColls.size());
975 }
976};
977
978// Extern templates for TakeHelper
979// NOTE: The move-constructor of specializations declared as extern templates
980// must be defined out of line, otherwise cling fails to find its symbol.
981template <typename RealT_t, typename T, typename COLL>
982TakeHelper<RealT_t, T, COLL>::TakeHelper(TakeHelper<RealT_t, T, COLL> &&) = default;
983template <typename RealT_t, typename T>
984TakeHelper<RealT_t, T, std::vector<T>>::TakeHelper(TakeHelper<RealT_t, T, std::vector<T>> &&) = default;
985template <typename RealT_t, typename COLL>
986TakeHelper<RealT_t, RVec<RealT_t>, COLL>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, COLL> &&) = default;
987template <typename RealT_t>
988TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>> &&) = default;
989
990// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
991#if __GNUC__ > 5
992extern template class TakeHelper<bool, bool, std::vector<bool>>;
993extern template class TakeHelper<unsigned int, unsigned int, std::vector<unsigned int>>;
994extern template class TakeHelper<unsigned long, unsigned long, std::vector<unsigned long>>;
995extern template class TakeHelper<unsigned long long, unsigned long long, std::vector<unsigned long long>>;
996extern template class TakeHelper<int, int, std::vector<int>>;
997extern template class TakeHelper<long, long, std::vector<long>>;
998extern template class TakeHelper<long long, long long, std::vector<long long>>;
999extern template class TakeHelper<float, float, std::vector<float>>;
1000extern template class TakeHelper<double, double, std::vector<double>>;
1001#endif
1002
1003template <typename ResultType>
1004class R__CLING_PTRCHECK(off) MinHelper : public RActionImpl<MinHelper<ResultType>> {
1005 std::shared_ptr<ResultType> fResultMin;
1006 Results<ResultType> fMins;
1007
1008public:
1009 MinHelper(MinHelper &&) = default;
1010 MinHelper(const std::shared_ptr<ResultType> &minVPtr, const unsigned int nSlots)
1011 : fResultMin(minVPtr), fMins(nSlots, std::numeric_limits<ResultType>::max())
1012 {
1013 }
1014
1015 void Exec(unsigned int slot, ResultType v) { fMins[slot] = std::min(v, fMins[slot]); }
1016
1017 void InitTask(TTreeReader *, unsigned int) {}
1018
1020 void Exec(unsigned int slot, const T &vs)
1021 {
1022 for (auto &&v : vs)
1023 fMins[slot] = std::min(static_cast<ResultType>(v), fMins[slot]);
1024 }
1025
1026 void Initialize() { /* noop */}
1027
1028 void Finalize()
1029 {
1030 *fResultMin = std::numeric_limits<ResultType>::max();
1031 for (auto &m : fMins)
1032 *fResultMin = std::min(m, *fResultMin);
1033 }
1034
1035 // Helper functions for RMergeableValue
1036 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1037 {
1038 return std::make_unique<RMergeableMin<ResultType>>(*fResultMin);
1039 }
1040
1041 ResultType &PartialUpdate(unsigned int slot) { return fMins[slot]; }
1042
1043 std::string GetActionName() { return "Min"; }
1044
1045 MinHelper MakeNew(void *newResult)
1046 {
1047 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1048 return MinHelper(result, fMins.size());
1049 }
1050};
1051
1052// TODO
1053// extern template void MinHelper::Exec(unsigned int, const std::vector<float> &);
1054// extern template void MinHelper::Exec(unsigned int, const std::vector<double> &);
1055// extern template void MinHelper::Exec(unsigned int, const std::vector<char> &);
1056// extern template void MinHelper::Exec(unsigned int, const std::vector<int> &);
1057// extern template void MinHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1058
1059template <typename ResultType>
1060class R__CLING_PTRCHECK(off) MaxHelper : public RActionImpl<MaxHelper<ResultType>> {
1061 std::shared_ptr<ResultType> fResultMax;
1062 Results<ResultType> fMaxs;
1063
1064public:
1065 MaxHelper(MaxHelper &&) = default;
1066 MaxHelper(const MaxHelper &) = delete;
1067 MaxHelper(const std::shared_ptr<ResultType> &maxVPtr, const unsigned int nSlots)
1068 : fResultMax(maxVPtr), fMaxs(nSlots, std::numeric_limits<ResultType>::lowest())
1069 {
1070 }
1071
1072 void InitTask(TTreeReader *, unsigned int) {}
1073 void Exec(unsigned int slot, ResultType v) { fMaxs[slot] = std::max(v, fMaxs[slot]); }
1074
1076 void Exec(unsigned int slot, const T &vs)
1077 {
1078 for (auto &&v : vs)
1079 fMaxs[slot] = std::max(static_cast<ResultType>(v), fMaxs[slot]);
1080 }
1081
1082 void Initialize() { /* noop */}
1083
1084 void Finalize()
1085 {
1086 *fResultMax = std::numeric_limits<ResultType>::lowest();
1087 for (auto &m : fMaxs) {
1088 *fResultMax = std::max(m, *fResultMax);
1089 }
1090 }
1091
1092 // Helper functions for RMergeableValue
1093 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1094 {
1095 return std::make_unique<RMergeableMax<ResultType>>(*fResultMax);
1096 }
1097
1098 ResultType &PartialUpdate(unsigned int slot) { return fMaxs[slot]; }
1099
1100 std::string GetActionName() { return "Max"; }
1101
1102 MaxHelper MakeNew(void *newResult)
1103 {
1104 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1105 return MaxHelper(result, fMaxs.size());
1106 }
1107};
1108
1109// TODO
1110// extern template void MaxHelper::Exec(unsigned int, const std::vector<float> &);
1111// extern template void MaxHelper::Exec(unsigned int, const std::vector<double> &);
1112// extern template void MaxHelper::Exec(unsigned int, const std::vector<char> &);
1113// extern template void MaxHelper::Exec(unsigned int, const std::vector<int> &);
1114// extern template void MaxHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1115
1116template <typename ResultType>
1117class R__CLING_PTRCHECK(off) SumHelper : public RActionImpl<SumHelper<ResultType>> {
1118 std::shared_ptr<ResultType> fResultSum;
1119 Results<ResultType> fSums;
1120 Results<ResultType> fCompensations;
1121
1122 /// Evaluate neutral element for this type and the sum operation.
1123 /// This is assumed to be any_value - any_value if operator- is defined
1124 /// for the type, otherwise a default-constructed ResultType{} is used.
1125 template <typename T = ResultType>
1126 auto NeutralElement(const T &v, int /*overloadresolver*/) -> decltype(v - v)
1127 {
1128 return v - v;
1129 }
1130
1131 template <typename T = ResultType, typename Dummy = int>
1132 ResultType NeutralElement(const T &, Dummy) // this overload has lower priority thanks to the template arg
1133 {
1134 return ResultType{};
1135 }
1136
1137public:
1138 SumHelper(SumHelper &&) = default;
1139 SumHelper(const SumHelper &) = delete;
1140 SumHelper(const std::shared_ptr<ResultType> &sumVPtr, const unsigned int nSlots)
1141 : fResultSum(sumVPtr), fSums(nSlots, NeutralElement(*sumVPtr, -1)),
1142 fCompensations(nSlots, NeutralElement(*sumVPtr, -1))
1143 {
1144 }
1145 void InitTask(TTreeReader *, unsigned int) {}
1146
1147 void Exec(unsigned int slot, ResultType x)
1148 {
1149 // Kahan Sum:
1150 ResultType y = x - fCompensations[slot];
1151 ResultType t = fSums[slot] + y;
1152 fCompensations[slot] = (t - fSums[slot]) - y;
1153 fSums[slot] = t;
1154 }
1155
1157 void Exec(unsigned int slot, const T &vs)
1158 {
1159 for (auto &&v : vs) {
1160 Exec(slot, v);
1161 }
1162 }
1163
1164 void Initialize() { /* noop */}
1165
1166 void Finalize()
1167 {
1168 ResultType sum(NeutralElement(ResultType{}, -1));
1169 ResultType compensation(NeutralElement(ResultType{}, -1));
1170 ResultType y(NeutralElement(ResultType{}, -1));
1171 ResultType t(NeutralElement(ResultType{}, -1));
1172 for (auto &m : fSums) {
1173 // Kahan Sum:
1174 y = m - compensation;
1175 t = sum + y;
1176 compensation = (t - sum) - y;
1177 sum = t;
1178 }
1179 *fResultSum += sum;
1180 }
1181
1182 // Helper functions for RMergeableValue
1183 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1184 {
1185 return std::make_unique<RMergeableSum<ResultType>>(*fResultSum);
1186 }
1187
1188 ResultType &PartialUpdate(unsigned int slot) { return fSums[slot]; }
1189
1190 std::string GetActionName() { return "Sum"; }
1191
1192 SumHelper MakeNew(void *newResult)
1193 {
1194 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1195 *result = NeutralElement(*result, -1);
1196 return SumHelper(result, fSums.size());
1197 }
1198};
1199
1200class R__CLING_PTRCHECK(off) MeanHelper : public RActionImpl<MeanHelper> {
1201 std::shared_ptr<double> fResultMean;
1202 std::vector<ULong64_t> fCounts;
1203 std::vector<double> fSums;
1204 std::vector<double> fPartialMeans;
1205 std::vector<double> fCompensations;
1206
1207public:
1208 MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
1209 MeanHelper(MeanHelper &&) = default;
1210 MeanHelper(const MeanHelper &) = delete;
1211 void InitTask(TTreeReader *, unsigned int) {}
1212 void Exec(unsigned int slot, double v);
1213
1215 void Exec(unsigned int slot, const T &vs)
1216 {
1217 for (auto &&v : vs) {
1218
1219 fCounts[slot]++;
1220 // Kahan Sum:
1221 double y = v - fCompensations[slot];
1222 double t = fSums[slot] + y;
1223 fCompensations[slot] = (t - fSums[slot]) - y;
1224 fSums[slot] = t;
1225 }
1226 }
1227
1228 void Initialize() { /* noop */}
1229
1230 void Finalize();
1231
1232 // Helper functions for RMergeableValue
1233 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1234 {
1235 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1236 return std::make_unique<RMergeableMean>(*fResultMean, counts);
1237 }
1238
1239 double &PartialUpdate(unsigned int slot);
1240
1241 std::string GetActionName() { return "Mean"; }
1242
1243 MeanHelper MakeNew(void *newResult)
1244 {
1245 auto &result = *static_cast<std::shared_ptr<double> *>(newResult);
1246 return MeanHelper(result, fSums.size());
1247 }
1248};
1249
1250extern template void MeanHelper::Exec(unsigned int, const std::vector<float> &);
1251extern template void MeanHelper::Exec(unsigned int, const std::vector<double> &);
1252extern template void MeanHelper::Exec(unsigned int, const std::vector<char> &);
1253extern template void MeanHelper::Exec(unsigned int, const std::vector<int> &);
1254extern template void MeanHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1255
1256class R__CLING_PTRCHECK(off) StdDevHelper : public RActionImpl<StdDevHelper> {
1257 // Number of subsets of data
1258 unsigned int fNSlots;
1259 std::shared_ptr<double> fResultStdDev;
1260 // Number of element for each slot
1261 std::vector<ULong64_t> fCounts;
1262 // Mean of each slot
1263 std::vector<double> fMeans;
1264 // Squared distance from the mean
1265 std::vector<double> fDistancesfromMean;
1266
1267public:
1268 StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
1269 StdDevHelper(StdDevHelper &&) = default;
1270 StdDevHelper(const StdDevHelper &) = delete;
1271 void InitTask(TTreeReader *, unsigned int) {}
1272 void Exec(unsigned int slot, double v);
1273
1275 void Exec(unsigned int slot, const T &vs)
1276 {
1277 for (auto &&v : vs) {
1278 Exec(slot, v);
1279 }
1280 }
1281
1282 void Initialize() { /* noop */}
1283
1284 void Finalize();
1285
1286 // Helper functions for RMergeableValue
1287 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1288 {
1289 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1290 const Double_t mean =
1291 std::inner_product(fMeans.begin(), fMeans.end(), fCounts.begin(), 0.) / static_cast<Double_t>(counts);
1292 return std::make_unique<RMergeableStdDev>(*fResultStdDev, counts, mean);
1293 }
1294
1295 std::string GetActionName() { return "StdDev"; }
1296
1297 StdDevHelper MakeNew(void *newResult)
1298 {
1299 auto &result = *static_cast<std::shared_ptr<double> *>(newResult);
1300 return StdDevHelper(result, fCounts.size());
1301 }
1302};
1303
1304extern template void StdDevHelper::Exec(unsigned int, const std::vector<float> &);
1305extern template void StdDevHelper::Exec(unsigned int, const std::vector<double> &);
1306extern template void StdDevHelper::Exec(unsigned int, const std::vector<char> &);
1307extern template void StdDevHelper::Exec(unsigned int, const std::vector<int> &);
1308extern template void StdDevHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1309
1310template <typename PrevNodeType>
1311class R__CLING_PTRCHECK(off) DisplayHelper : public RActionImpl<DisplayHelper<PrevNodeType>> {
1312private:
1314 std::shared_ptr<Display_t> fDisplayerHelper;
1315 std::shared_ptr<PrevNodeType> fPrevNode;
1316 size_t fEntriesToProcess;
1317
1318public:
1319 DisplayHelper(size_t nRows, const std::shared_ptr<Display_t> &d, const std::shared_ptr<PrevNodeType> &prevNode)
1320 : fDisplayerHelper(d), fPrevNode(prevNode), fEntriesToProcess(nRows)
1321 {
1322 }
1323 DisplayHelper(DisplayHelper &&) = default;
1324 DisplayHelper(const DisplayHelper &) = delete;
1325 void InitTask(TTreeReader *, unsigned int) {}
1326
1327 template <typename... Columns>
1328 void Exec(unsigned int, Columns &... columns)
1329 {
1330 if (fEntriesToProcess == 0)
1331 return;
1332
1333 fDisplayerHelper->AddRow(columns...);
1334 --fEntriesToProcess;
1335
1336 if (fEntriesToProcess == 0) {
1337 // No more entries to process. Send a one-time signal that this node
1338 // of the graph is done. It is important that the 'StopProcessing'
1339 // method is only called once from this helper, otherwise it would seem
1340 // like more than one operation has completed its work.
1341 fPrevNode->StopProcessing();
1342 }
1343 }
1344
1345 void Initialize() {}
1346
1347 void Finalize() {}
1348
1349 std::string GetActionName() { return "Display"; }
1350};
1351
1352template <typename T>
1353void *GetData(ROOT::VecOps::RVec<T> &v)
1354{
1355 return v.data();
1356}
1357
1358template <typename T>
1359void *GetData(T & /*v*/)
1360{
1361 return nullptr;
1362}
1363
1364template <typename T>
1365void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &name,
1366 TBranch *&branch, void *&branchAddress, T *address, RBranchSet &outputBranches,
1367 bool /*isDefine*/)
1368{
1369 static TClassRef TBOClRef("TBranchObject");
1370
1371 TBranch *inputBranch = nullptr;
1372 if (inputTree) {
1373 inputBranch = inputTree->GetBranch(inName.c_str());
1374 if (!inputBranch) // try harder
1375 inputBranch = inputTree->FindBranch(inName.c_str());
1376 }
1377
1378 auto *outputBranch = outputBranches.Get(name);
1379 if (outputBranch) {
1380 // the output branch was already created, we just need to (re)set its address
1381 if (inputBranch && inputBranch->IsA() == TBOClRef) {
1382 outputBranch->SetAddress(reinterpret_cast<T **>(inputBranch->GetAddress()));
1383 } else if (outputBranch->IsA() != TBranch::Class()) {
1384 branchAddress = address;
1385 outputBranch->SetAddress(&branchAddress);
1386 } else {
1387 outputBranch->SetAddress(address);
1388 branchAddress = address;
1389 }
1390 return;
1391 }
1392
1393 if (inputBranch) {
1394 // Respect the original bufsize and splitlevel arguments
1395 // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid
1396 // writing garbage when unsplit objects cannot be written as split objects (e.g. in case of a polymorphic
1397 // TObject branch, see https://bit.ly/2EjLMId ).
1398 const auto bufSize = inputBranch->GetBasketSize();
1399 const auto splitLevel = inputBranch->GetSplitLevel();
1400
1401 if (inputBranch->IsA() == TBOClRef) {
1402 // Need to pass a pointer to pointer
1403 outputBranch =
1404 outputTree.Branch(name.c_str(), reinterpret_cast<T **>(inputBranch->GetAddress()), bufSize, splitLevel);
1405 } else {
1406 outputBranch = outputTree.Branch(name.c_str(), address, bufSize, splitLevel);
1407 }
1408 } else {
1409 outputBranch = outputTree.Branch(name.c_str(), address);
1410 }
1411 outputBranches.Insert(name, outputBranch);
1412 // This is not an array branch, so we don't register the address of the output branch here
1413 branch = nullptr;
1414 branchAddress = nullptr;
1415}
1416
1417/// Helper function for SnapshotHelper and SnapshotHelperMT. It creates new branches for the output TTree of a Snapshot.
1418/// This overload is called for columns of type `RVec<T>`. For RDF, these can represent:
1419/// 1. c-style arrays in ROOT files, so we are sure that there are input trees to which we can ask the correct branch
1420/// title
1421/// 2. RVecs coming from a custom column or the input file/data-source
1422/// 3. vectors coming from ROOT files that are being read as RVecs
1423/// 4. TClonesArray
1424///
1425/// In case of 1., we keep aside the pointer to the branch and the pointer to the input value (in `branch` and
1426/// `branchAddress`) so we can intercept changes in the address of the input branch and tell the output branch.
1427template <typename T>
1428void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &outName,
1429 TBranch *&branch, void *&branchAddress, RVec<T> *ab, RBranchSet &outputBranches, bool isDefine)
1430{
1431 TBranch *inputBranch = nullptr;
1432 if (inputTree) {
1433 inputBranch = inputTree->GetBranch(inName.c_str());
1434 if (!inputBranch) // try harder
1435 inputBranch = inputTree->FindBranch(inName.c_str());
1436 }
1437 auto *outputBranch = outputBranches.Get(outName);
1438
1439 // if no backing input branch, we must write out an RVec
1440 bool mustWriteRVec = (inputBranch == nullptr || isDefine);
1441 // otherwise, if input branch is TClonesArray, must write out an RVec
1442 if (!mustWriteRVec && std::string_view(inputBranch->GetClassName()) == "TClonesArray") {
1443 mustWriteRVec = true;
1444 Warning("Snapshot",
1445 "Branch \"%s\" contains TClonesArrays but the type specified to Snapshot was RVec<T>. The branch will "
1446 "be written out as a RVec instead of a TClonesArray. Specify that the type of the branch is "
1447 "TClonesArray as a Snapshot template parameter to write out a TClonesArray instead.",
1448 inName.c_str());
1449 }
1450 // otherwise, if input branch is a std::vector or RVec, must write out an RVec
1451 if (!mustWriteRVec) {
1452 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
1454 mustWriteRVec = true;
1455 }
1456
1457 if (mustWriteRVec) {
1458 // Treat:
1459 // 2. RVec coming from a custom column or a source
1460 // 3. RVec coming from a column on disk of type vector (the RVec is adopting the data of that vector)
1461 // 4. TClonesArray written out as RVec<T>
1462 if (outputBranch) {
1463 // needs to be SetObject (not SetAddress) to mimic what happens when this TBranchElement is constructed
1464 outputBranch->SetObject(ab);
1465 } else {
1466 auto *b = outputTree.Branch(outName.c_str(), ab);
1467 outputBranches.Insert(outName, b);
1468 }
1469 return;
1470 }
1471
1472 // else this must be a C-array, aka case 1.
1473 auto dataPtr = ab->data();
1474
1475 if (outputBranch) {
1476 if (outputBranch->IsA() != TBranch::Class()) {
1477 branchAddress = dataPtr;
1478 outputBranch->SetAddress(&branchAddress);
1479 } else {
1480 outputBranch->SetAddress(dataPtr);
1481 }
1482 } else {
1483 // must construct the leaflist for the output branch and create the branch in the output tree
1484 auto *const leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
1485 const auto bname = leaf->GetName();
1486 auto *sizeLeaf = leaf->GetLeafCount();
1487 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
1488
1489 if (sizeLeaf && !outputBranches.Get(sizeLeafName)) {
1490 // The output array branch `bname` has dynamic size stored in leaf `sizeLeafName`, but that leaf has not been
1491 // added to the output tree yet. However, the size leaf has to be available for the creation of the array
1492 // branch to be successful. So we create the size leaf here.
1493 const auto sizeTypeStr = TypeName2ROOTTypeName(sizeLeaf->GetTypeName());
1494 const auto sizeBufSize = sizeLeaf->GetBranch()->GetBasketSize();
1495 // The null branch address is a placeholder. It will be set when SetBranchesHelper is called for `sizeLeafName`
1496 auto *sizeBranch = outputTree.Branch(sizeLeafName.c_str(), (void *)nullptr,
1497 (sizeLeafName + '/' + sizeTypeStr).c_str(), sizeBufSize);
1498 outputBranches.Insert(sizeLeafName, sizeBranch);
1499 }
1500
1501 const auto btype = leaf->GetTypeName();
1502 const auto rootbtype = TypeName2ROOTTypeName(btype);
1503 if (rootbtype == ' ') {
1504 Warning("Snapshot",
1505 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. This "
1506 "column will not be written out.",
1507 bname);
1508 } else {
1509 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
1510 outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str());
1511 outputBranch->SetTitle(inputBranch->GetTitle());
1512 outputBranches.Insert(outName, outputBranch);
1513 branch = outputBranch;
1514 branchAddress = ab->data();
1515 }
1516 }
1517}
1518
1519void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName);
1520
1521/// Helper object for a single-thread Snapshot action
1522template <typename... ColTypes>
1523class R__CLING_PTRCHECK(off) SnapshotHelper : public RActionImpl<SnapshotHelper<ColTypes...>> {
1524 std::string fFileName;
1525 std::string fDirName;
1526 std::string fTreeName;
1527 RSnapshotOptions fOptions;
1528 std::unique_ptr<TFile> fOutputFile;
1529 std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
1530 bool fBranchAddressesNeedReset{true};
1531 ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1532 ColumnNames_t fOutputBranchNames;
1533 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
1534 // TODO we might be able to unify fBranches, fBranchAddresses and fOutputBranches
1535 std::vector<TBranch *> fBranches; // Addresses of branches in output, non-null only for the ones holding C arrays
1536 std::vector<void *> fBranchAddresses; // Addresses of objects associated to output branches
1537 RBranchSet fOutputBranches;
1538 std::vector<bool> fIsDefine;
1539
1540public:
1541 using ColumnTypes_t = TypeList<ColTypes...>;
1542 SnapshotHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
1543 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options,
1544 std::vector<bool> &&isDefine)
1545 : fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1546 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fBranches(vbnames.size(), nullptr),
1547 fBranchAddresses(vbnames.size(), nullptr), fIsDefine(std::move(isDefine))
1548 {
1549 ValidateSnapshotOutput(fOptions, fTreeName, fFileName);
1550 }
1551
1552 SnapshotHelper(const SnapshotHelper &) = delete;
1553 SnapshotHelper(SnapshotHelper &&) = default;
1554 ~SnapshotHelper()
1555 {
1556 if (!fTreeName.empty() /*not moved from*/ && !fOutputFile /* did not run */ && fOptions.fLazy)
1557 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1558 }
1559
1560 void InitTask(TTreeReader *r, unsigned int /* slot */)
1561 {
1562 if (r)
1563 fInputTree = r->GetTree();
1564 fBranchAddressesNeedReset = true;
1565 }
1566
1567 void Exec(unsigned int /* slot */, ColTypes &... values)
1568 {
1569 using ind_t = std::index_sequence_for<ColTypes...>;
1570 if (!fBranchAddressesNeedReset) {
1571 UpdateCArraysPtrs(values..., ind_t{});
1572 } else {
1573 SetBranches(values..., ind_t{});
1574 fBranchAddressesNeedReset = false;
1575 }
1576 fOutputTree->Fill();
1577 }
1578
1579 template <std::size_t... S>
1580 void UpdateCArraysPtrs(ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1581 {
1582 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1583 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1584 // leaving associated to the branch of the output tree an invalid pointer.
1585 // With this code, we set the value of the pointer in the output branch anew when needed.
1586 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1587 // we need an int for the expander list.
1588 int expander[] = {(fBranches[S] && fBranchAddresses[S] != GetData(values)
1589 ? fBranches[S]->SetAddress(GetData(values)),
1590 fBranchAddresses[S] = GetData(values), 0 : 0, 0)...,
1591 0};
1592 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1593 }
1594
1595 template <std::size_t... S>
1596 void SetBranches(ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1597 {
1598 // create branches in output tree
1599 int expander[] = {(SetBranchesHelper(fInputTree, *fOutputTree, fInputBranchNames[S], fOutputBranchNames[S],
1600 fBranches[S], fBranchAddresses[S], &values, fOutputBranches, fIsDefine[S]),
1601 0)...,
1602 0};
1603 fOutputBranches.AssertNoNullBranchAddresses();
1604 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1605 }
1606
1607 void Initialize()
1608 {
1609 fOutputFile.reset(
1610 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
1612 if(!fOutputFile)
1613 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
1614
1615 TDirectory *outputDir = fOutputFile.get();
1616 if (!fDirName.empty()) {
1617 TString checkupdate = fOptions.fMode;
1618 checkupdate.ToLower();
1619 if (checkupdate == "update")
1620 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
1621 else
1622 outputDir = fOutputFile->mkdir(fDirName.c_str());
1623 }
1624
1625 fOutputTree =
1626 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/outputDir);
1627
1628 if (fOptions.fAutoFlush)
1629 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
1630 }
1631
1632 void Finalize()
1633 {
1634 assert(fOutputTree != nullptr);
1635 assert(fOutputFile != nullptr);
1636
1637 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
1638 fOutputTree->AutoSave("flushbaskets");
1639 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
1640 fOutputTree.reset();
1641 fOutputFile->Close();
1642 }
1643
1644 std::string GetActionName() { return "Snapshot"; }
1645
1646 ROOT::RDF::SampleCallback_t GetSampleCallback() final
1647 {
1648 return [this](unsigned int, const RSampleInfo &) mutable { fBranchAddressesNeedReset = true; };
1649 }
1650};
1651
1652/// Helper object for a multi-thread Snapshot action
1653template <typename... ColTypes>
1654class R__CLING_PTRCHECK(off) SnapshotHelperMT : public RActionImpl<SnapshotHelperMT<ColTypes...>> {
1655 unsigned int fNSlots;
1656 std::unique_ptr<ROOT::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
1657 std::vector<std::shared_ptr<ROOT::TBufferMergerFile>> fOutputFiles;
1658 std::vector<std::unique_ptr<TTree>> fOutputTrees;
1659 std::vector<int> fBranchAddressesNeedReset; // vector<bool> does not allow concurrent writing of different elements
1660 std::string fFileName; // name of the output file name
1661 std::string fDirName; // name of TFile subdirectory in which output must be written (possibly empty)
1662 std::string fTreeName; // name of output tree
1663 RSnapshotOptions fOptions; // struct holding options to pass down to TFile and TTree in this action
1664 ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1665 ColumnNames_t fOutputBranchNames;
1666 std::vector<TTree *> fInputTrees; // Current input trees. Set at initialization time (`InitTask`)
1667 // Addresses of branches in output per slot, non-null only for the ones holding C arrays
1668 std::vector<std::vector<TBranch *>> fBranches;
1669 // Addresses associated to output branches per slot, non-null only for the ones holding C arrays
1670 std::vector<std::vector<void *>> fBranchAddresses;
1671 std::vector<RBranchSet> fOutputBranches;
1672 std::vector<bool> fIsDefine;
1673
1674public:
1675 using ColumnTypes_t = TypeList<ColTypes...>;
1676 SnapshotHelperMT(const unsigned int nSlots, std::string_view filename, std::string_view dirname,
1677 std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
1678 const RSnapshotOptions &options, std::vector<bool> &&isDefine)
1679 : fNSlots(nSlots), fOutputFiles(fNSlots), fOutputTrees(fNSlots), fBranchAddressesNeedReset(fNSlots, 1),
1680 fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1681 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fInputTrees(fNSlots),
1682 fBranches(fNSlots, std::vector<TBranch *>(vbnames.size(), nullptr)),
1683 fBranchAddresses(fNSlots, std::vector<void *>(vbnames.size(), nullptr)), fOutputBranches(fNSlots),
1684 fIsDefine(std::move(isDefine))
1685 {
1686 ValidateSnapshotOutput(fOptions, fTreeName, fFileName);
1687 }
1688 SnapshotHelperMT(const SnapshotHelperMT &) = delete;
1689 SnapshotHelperMT(SnapshotHelperMT &&) = default;
1690 ~SnapshotHelperMT()
1691 {
1692 if (!fTreeName.empty() /*not moved from*/ && fOptions.fLazy &&
1693 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &f) { return !f; }) /* never run */)
1694 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1695 }
1696
1697 void InitTask(TTreeReader *r, unsigned int slot)
1698 {
1699 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
1700 if (!fOutputFiles[slot]) {
1701 // first time this thread executes something, let's create a TBufferMerger output directory
1702 fOutputFiles[slot] = fMerger->GetFile();
1703 }
1704 TDirectory *treeDirectory = fOutputFiles[slot].get();
1705 if (!fDirName.empty()) {
1706 // call returnExistingDirectory=true since MT can end up making this call multiple times
1707 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
1708 }
1709 // re-create output tree as we need to create its branches again, with new input variables
1710 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
1711 fOutputTrees[slot] =
1712 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
1713 fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
1714 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
1715 fOutputTrees[slot]->SetImplicitMT(false);
1716 if (fOptions.fAutoFlush)
1717 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
1718 if (r) {
1719 // not an empty-source RDF
1720 fInputTrees[slot] = r->GetTree();
1721 }
1722 fBranchAddressesNeedReset[slot] = 1; // reset first event flag for this slot
1723 }
1724
1725 void FinalizeTask(unsigned int slot)
1726 {
1727 if (fOutputTrees[slot]->GetEntries() > 0)
1728 fOutputFiles[slot]->Write();
1729 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
1730 fOutputTrees[slot].reset(nullptr);
1731 fOutputBranches[slot].Clear();
1732 }
1733
1734 void Exec(unsigned int slot, ColTypes &... values)
1735 {
1736 using ind_t = std::index_sequence_for<ColTypes...>;
1737 if (fBranchAddressesNeedReset[slot] == 0) {
1738 UpdateCArraysPtrs(slot, values..., ind_t{});
1739 } else {
1740 SetBranches(slot, values..., ind_t{});
1741 fBranchAddressesNeedReset[slot] = 0;
1742 }
1743 fOutputTrees[slot]->Fill();
1744 auto entries = fOutputTrees[slot]->GetEntries();
1745 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
1746 if ((autoFlush > 0) && (entries % autoFlush == 0))
1747 fOutputFiles[slot]->Write();
1748 }
1749
1750 template <std::size_t... S>
1751 void UpdateCArraysPtrs(unsigned int slot, ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1752 {
1753 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1754 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1755 // leaving associated to the branch of the output tree an invalid pointer.
1756 // With this code, we set the value of the pointer in the output branch anew when needed.
1757 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1758 // we need an int for the expander list.
1759 int expander[] = {(fBranches[slot][S] && fBranchAddresses[slot][S] != GetData(values)
1760 ? fBranches[slot][S]->SetAddress(GetData(values)),
1761 fBranchAddresses[slot][S] = GetData(values), 0 : 0, 0)...,
1762 0};
1763 (void)expander; // avoid unused parameter warnings (gcc 12.1)
1764 }
1765
1766 template <std::size_t... S>
1767 void SetBranches(unsigned int slot, ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1768 {
1769 // hack to call TTree::Branch on all variadic template arguments
1770 int expander[] = {(SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S],
1771 fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S],
1772 &values, fOutputBranches[slot], fIsDefine[S]),
1773 0)...,
1774 0};
1775 fOutputBranches[slot].AssertNoNullBranchAddresses();
1776 (void)expander; // avoid unused parameter warnings (gcc 12.1)
1777 }
1778
1779 void Initialize()
1780 {
1781 const auto cs = ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
1782 auto out_file = TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/fFileName.c_str(), cs);
1783 if(!out_file)
1784 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
1785 fMerger = std::make_unique<ROOT::TBufferMerger>(std::unique_ptr<TFile>(out_file));
1786 }
1787
1788 void Finalize()
1789 {
1790 assert(std::any_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &ptr) { return ptr != nullptr; }));
1791
1792 auto fileWritten = false;
1793 for (auto &file : fOutputFiles) {
1794 if (file) {
1795 file->Write();
1796 file->Close();
1797 fileWritten = true;
1798 }
1799 }
1800
1801 if (!fileWritten) {
1802 Warning("Snapshot",
1803 "No input entries (input TTree was empty or no entry passed the Filters). Output TTree is empty.");
1804 }
1805
1806 // flush all buffers to disk by destroying the TBufferMerger
1807 fOutputFiles.clear();
1808 fMerger.reset();
1809 }
1810
1811 std::string GetActionName() { return "Snapshot"; }
1812
1813 ROOT::RDF::SampleCallback_t GetSampleCallback() final
1814 {
1815 return [this](unsigned int slot, const RSampleInfo &) mutable { fBranchAddressesNeedReset[slot] = 1; };
1816 }
1817};
1818
1819template <typename Acc, typename Merge, typename R, typename T, typename U,
1820 bool MustCopyAssign = std::is_same<R, U>::value>
1821class R__CLING_PTRCHECK(off) AggregateHelper
1822 : public RActionImpl<AggregateHelper<Acc, Merge, R, T, U, MustCopyAssign>> {
1823 Acc fAggregate;
1824 Merge fMerge;
1825 std::shared_ptr<U> fResult;
1826 Results<U> fAggregators;
1827
1828public:
1829 using ColumnTypes_t = TypeList<T>;
1830
1831 AggregateHelper(Acc &&f, Merge &&m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1832 : fAggregate(std::move(f)), fMerge(std::move(m)), fResult(result), fAggregators(nSlots, *result)
1833 {
1834 }
1835
1836 AggregateHelper(Acc &f, Merge &m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1837 : fAggregate(f), fMerge(m), fResult(result), fAggregators(nSlots, *result)
1838 {
1839 }
1840
1841 AggregateHelper(AggregateHelper &&) = default;
1842 AggregateHelper(const AggregateHelper &) = delete;
1843
1844 void InitTask(TTreeReader *, unsigned int) {}
1845
1846 template <bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<MustCopyAssign_, int> = 0>
1847 void Exec(unsigned int slot, const T &value)
1848 {
1849 fAggregators[slot] = fAggregate(fAggregators[slot], value);
1850 }
1851
1852 template <bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<!MustCopyAssign_, int> = 0>
1853 void Exec(unsigned int slot, const T &value)
1854 {
1855 fAggregate(fAggregators[slot], value);
1856 }
1857
1858 void Initialize() { /* noop */}
1859
1860 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1862 std::enable_if_t<MergeAll, void> Finalize()
1863 {
1864 fMerge(fAggregators);
1865 *fResult = fAggregators[0];
1866 }
1867
1868 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1869 bool MergeTwoByTwo = std::is_same<U, MergeRet>::value>
1870 std::enable_if_t<MergeTwoByTwo, void> Finalize(...) // ... needed to let compiler distinguish overloads
1871 {
1872 for (const auto &acc : fAggregators)
1873 *fResult = fMerge(*fResult, acc);
1874 }
1875
1876 U &PartialUpdate(unsigned int slot) { return fAggregators[slot]; }
1877
1878 std::string GetActionName() { return "Aggregate"; }
1879
1880 AggregateHelper MakeNew(void *newResult)
1881 {
1882 auto &result = *static_cast<std::shared_ptr<U> *>(newResult);
1883 return AggregateHelper(fAggregate, fMerge, result, fAggregators.size());
1884 }
1885};
1886
1887} // end of NS RDF
1888} // end of NS Internal
1889} // end of NS ROOT
1890
1891/// \endcond
1892
1893#endif
PyObject * fCallable
Definition: CPPOverload.cxx:41
Handle_t Display_t
Display handle.
Definition: GuiTypes.h:27
#define d(i)
Definition: RSha256.hxx:102
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
double Double_t
Definition: RtypesCore.h:59
unsigned long long ULong64_t
Definition: RtypesCore.h:81
#define R__CLING_PTRCHECK(ONOFF)
Definition: Rtypes.h:498
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition: TError.cxx:232
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t b
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char x1
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t g
char name[80]
Definition: TGX11.cxx:110
TClass * IsA() const override
Definition: TStringLong.h:20
TTime operator*(const TTime &t1, const TTime &t2)
Definition: TTime.h:85
Base class for action helpers, see RInterface::Book() for more information.
Definition: RActionImpl.hxx:26
pointer data() noexcept
Return a pointer to the vector's buffer, even if empty().
Definition: RVec.hxx:280
This class is the textual representation of the content of a columnar dataset.
Definition: RDisplay.hxx:64
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
Definition: RSampleInfo.hxx:35
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
Definition: TypeTraits.hxx:169
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition: RVec.hxx:1478
A TTree is a list of TBranches.
Definition: TBranch.h:89
virtual const char * GetClassName() const
Return the name of the user class whose content is stored in this branch, if any.
Definition: TBranch.cxx:1324
virtual char * GetAddress() const
Definition: TBranch.h:208
static TClass * Class()
Int_t GetSplitLevel() const
Definition: TBranch.h:246
TClass * IsA() const override
Definition: TBranch.h:291
virtual Int_t GetBasketSize() const
Definition: TBranch.h:213
TObjArray * GetListOfLeaves()
Definition: TBranch.h:243
TClassRef is used to implement a permanent reference to a TClass object.
Definition: TClassRef.h:28
Collection abstract base class.
Definition: TCollection.h:65
TDirectory::TContext keeps track and restore the current directory.
Definition: TDirectory.h:89
Describe directory structure in memory.
Definition: TDirectory.h:45
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:4053
TGraph with asymmetric error bars.
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:620
TH1 is the base class of all histogram classes in ROOT.
Definition: TH1.h:58
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition: TLeaf.h:57
A doubly linked list.
Definition: TList.h:38
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
const char * GetName() const override
Returns name of object.
Definition: TNamed.h:47
const char * GetTitle() const override
Returns title of object.
Definition: TNamed.h:48
TObject * UncheckedAt(Int_t i) const
Definition: TObjArray.h:84
Statistical variable, defined by its mean and variance (RMS).
Definition: TStatistic.h:33
Basic string class.
Definition: TString.h:136
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1171
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition: TTreeReader.h:44
A TTree represents a columnar dataset.
Definition: TTree.h:79
virtual TBranch * FindBranch(const char *name)
Return the branch that correspond to the path 'branchname', which can include the name of the tree or...
Definition: TTree.cxx:4809
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
Definition: TTree.cxx:5262
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
Definition: TTree.h:350
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition: TTree.h:258
RooCmdArg Columns(Int_t ncol)
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
basic_string_view< char > string_view
#define F(x, y, z)
#define H(x, y, z)
CPYCPPYY_EXTERN bool Exec(const std::string &cmd)
Definition: API.cxx:333
std::unique_ptr< RMergeableVariations< T > > GetMergeableValue(ROOT::RDF::Experimental::RResultMap< T > &rmap)
Retrieve mergeable values after calling ROOT::RDF::VariationsFor .
Definition: RResultMap.hxx:174
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition: RDFUtils.cxx:296
void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition: RDFUtils.cxx:252
constexpr std::size_t FindIdxTrue(const T &arr)
Definition: Utils.hxx:229
void(off) SmallVectorTemplateBase< T
double T(double x)
Definition: ChebyshevPol.h:34
double inner_product(const LAVector &, const LAVector &)
std::vector< std::string > ColumnNames_t
std::function< void(unsigned int, const ROOT::RDF::RSampleInfo &)> SampleCallback_t
The type of a data-block callback, registered with a RDataFrame computation graph via e....
ROOT type_traits extensions.
Definition: TypeTraits.hxx:21
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
@ kROOTRVec
Definition: ESTLType.h:46
@ kSTLvector
Definition: ESTLType.h:30
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
RooArgSet S(Args_t &&... args)
Definition: RooArgSet.h:240
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
Definition: TClassEdit.cxx:528
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
Definition: file.py:1
static constexpr bool value
Definition: Utils.hxx:101
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
bool fLazy
Do not start the event loop when Snapshot is called.
int fCompressionLevel
Compression level of output file.
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:25
TMarker m
Definition: textangle.C:8
TLine l
Definition: textangle.C:4