Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ActionHelpers.hxx
Go to the documentation of this file.
1/**
2 \file ROOT/RDF/ActionHelpers.hxx
3 \ingroup dataframe
4 \author Enrico Guiraud, CERN
5 \author Danilo Piparo, CERN
6 \date 2016-12
7 \author Vincenzo Eduardo Padulano
8 \date 2020-06
9*/
10
11/*************************************************************************
12 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
13 * All rights reserved. *
14 * *
15 * For the licensing terms see $ROOTSYS/LICENSE. *
16 * For the list of contributors see $ROOTSYS/README/CREDITS. *
17 *************************************************************************/
18
19#ifndef ROOT_RDFOPERATIONS
20#define ROOT_RDFOPERATIONS
21
22#include "Compression.h"
23#include <string_view>
24#include "ROOT/RVec.hxx"
25#include "ROOT/TBufferMerger.hxx" // for SnapshotTTreeHelper
28#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/TypeTraits.hxx"
31#include "ROOT/RDF/RDisplay.hxx"
32#include "RtypesCore.h"
33#include "TBranch.h"
34#include "TClassEdit.h"
35#include "TClassRef.h"
36#include "TDirectory.h"
37#include "TError.h" // for R__ASSERT, Warning
38#include "TFile.h" // for SnapshotTTreeHelper
39#include "TH1.h"
40#include "TGraph.h"
41#include "TGraphAsymmErrors.h"
42#include "TLeaf.h"
43#include "TObject.h"
44#include "TTree.h"
45#include "TTreeReader.h" // for SnapshotTTreeHelper
46#include "TStatistic.h"
50
51#include "ROOT/RNTupleDS.hxx"
52#include "ROOT/RNTupleWriter.hxx" // for SnapshotRNTupleHelper
53#include "ROOT/RTTreeDS.hxx"
54
55#include <algorithm>
56#include <functional>
57#include <limits>
58#include <memory>
59#include <stdexcept>
60#include <string>
61#include <type_traits>
62#include <utility> // std::index_sequence
63#include <vector>
64#include <iomanip>
65#include <numeric> // std::accumulate in MeanHelper
66
67/// \cond HIDDEN_SYMBOLS
68
69namespace ROOT {
70namespace Internal {
71namespace RDF {
72using namespace ROOT::TypeTraits;
73using namespace ROOT::VecOps;
74using namespace ROOT::RDF;
75using namespace ROOT::Detail::RDF;
76
77using Hist_t = ::TH1D;
78
79class RBranchSet {
80 std::vector<TBranch *> fBranches;
81 std::vector<std::string> fNames;
82
83public:
84 TBranch *Get(const std::string &name) const
85 {
86 auto it = std::find(fNames.begin(), fNames.end(), name);
87 if (it == fNames.end())
88 return nullptr;
89 return fBranches[std::distance(fNames.begin(), it)];
90 }
91
92 void Insert(const std::string &name, TBranch *address)
93 {
94 if (address == nullptr) {
95 throw std::logic_error("Trying to insert a null branch address.");
96 }
97 if (std::find(fBranches.begin(), fBranches.end(), address) != fBranches.end()) {
98 throw std::logic_error("Trying to insert a branch address that's already present.");
99 }
100 if (std::find(fNames.begin(), fNames.end(), name) != fNames.end()) {
101 throw std::logic_error("Trying to insert a branch name that's already present.");
102 }
103 fNames.emplace_back(name);
104 fBranches.emplace_back(address);
105 }
106
107 void Clear()
108 {
109 fBranches.clear();
110 fNames.clear();
111 }
112
114 {
115 std::vector<TBranch *> branchesWithNullAddress;
116 std::copy_if(fBranches.begin(), fBranches.end(), std::back_inserter(branchesWithNullAddress),
117 [](TBranch *b) { return b->GetAddress() == nullptr; });
118
119 if (branchesWithNullAddress.empty())
120 return;
121
122 // otherwise build error message and throw
123 std::vector<std::string> missingBranchNames;
125 std::back_inserter(missingBranchNames), [](TBranch *b) { return b->GetName(); });
126 std::string msg = "RDataFrame::Snapshot:";
127 if (missingBranchNames.size() == 1) {
128 msg += " branch " + missingBranchNames[0] +
129 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
130 "it is";
131 } else {
132 msg += " branches ";
133 for (const auto &bName : missingBranchNames)
134 msg += bName + ", ";
135 msg.resize(msg.size() - 2); // remove last ", "
136 msg +=
137 " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
138 }
139 msg += " not part of the set of branches that are being written out.";
140 throw std::runtime_error(msg);
141 }
142};
143
144/// The container type for each thread's partial result in an action helper
145// We have to avoid to instantiate std::vector<bool> as that makes it impossible to return a reference to one of
146// the thread-local results. In addition, a common definition for the type of the container makes it easy to swap
147// the type of the underlying container if e.g. we see problems with false sharing of the thread-local results..
148template <typename T>
149using Results = std::conditional_t<std::is_same<T, bool>::value, std::deque<T>, std::vector<T>>;
150
151template <typename F>
152class R__CLING_PTRCHECK(off) ForeachSlotHelper : public RActionImpl<ForeachSlotHelper<F>> {
153 F fCallable;
154
155public:
157 ForeachSlotHelper(F &&f) : fCallable(f) {}
159 ForeachSlotHelper(const ForeachSlotHelper &) = delete;
160
161 void InitTask(TTreeReader *, unsigned int) {}
162
163 template <typename... Args>
164 void Exec(unsigned int slot, Args &&... args)
165 {
166 // check that the decayed types of Args are the same as the branch types
167 static_assert(std::is_same<TypeList<std::decay_t<Args>...>, ColumnTypes_t>::value, "");
168 fCallable(slot, std::forward<Args>(args)...);
169 }
170
171 void Initialize() { /* noop */}
172
173 void Finalize() { /* noop */}
174
175 std::string GetActionName() { return "ForeachSlot"; }
176};
177
178class R__CLING_PTRCHECK(off) CountHelper : public RActionImpl<CountHelper> {
179 std::shared_ptr<ULong64_t> fResultCount;
180 Results<ULong64_t> fCounts;
181
182public:
183 using ColumnTypes_t = TypeList<>;
184 CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots);
185 CountHelper(CountHelper &&) = default;
186 CountHelper(const CountHelper &) = delete;
187 void InitTask(TTreeReader *, unsigned int) {}
188 void Exec(unsigned int slot);
189 void Initialize() { /* noop */}
190 void Finalize();
191
192 // Helper functions for RMergeableValue
193 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
194 {
195 return std::make_unique<RMergeableCount>(*fResultCount);
196 }
197
198 ULong64_t &PartialUpdate(unsigned int slot);
199
200 std::string GetActionName() { return "Count"; }
201
202 CountHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
203 {
204 auto &result = *static_cast<std::shared_ptr<ULong64_t> *>(newResult);
205 return CountHelper(result, fCounts.size());
206 }
207};
208
209template <typename RNode_t>
210class R__CLING_PTRCHECK(off) ReportHelper : public RActionImpl<ReportHelper<RNode_t>> {
211 std::shared_ptr<RCutFlowReport> fReport;
212 /// Non-owning pointer, never null. As usual, the node is owned by its children nodes (and therefore indirectly by
213 /// the RAction corresponding to this action helper).
214 RNode_t *fNode;
216
217public:
218 using ColumnTypes_t = TypeList<>;
219 ReportHelper(const std::shared_ptr<RCutFlowReport> &report, RNode_t *node, bool emptyRep)
220 : fReport(report), fNode(node), fReturnEmptyReport(emptyRep){};
221 ReportHelper(ReportHelper &&) = default;
222 ReportHelper(const ReportHelper &) = delete;
223 void InitTask(TTreeReader *, unsigned int) {}
224 void Exec(unsigned int /* slot */) {}
225 void Initialize() { /* noop */}
226 void Finalize()
227 {
229 fNode->Report(*fReport);
230 }
231
232 std::string GetActionName() { return "Report"; }
233
234 ReportHelper MakeNew(void *newResult, std::string_view variation = "nominal")
235 {
236 auto &&result = *static_cast<std::shared_ptr<RCutFlowReport> *>(newResult);
237 return ReportHelper{result,
238 std::static_pointer_cast<RNode_t>(fNode->GetVariedFilter(std::string(variation))).get(),
240 }
241};
242
243/// This helper fills TH1Ds for which no axes were specified by buffering the fill values to pick good axes limits.
244///
245/// TH1Ds have an automatic mechanism to pick good limits based on the first N entries they were filled with, but
246/// that does not work in multi-thread event loops as it might yield histograms with incompatible binning in each
247/// thread, making it impossible to merge the per-thread results.
248/// Instead, this helper delays the decision on the axes limits until all threads have done processing, synchronizing
249/// the decision on the limits as part of the merge operation.
250class R__CLING_PTRCHECK(off) BufferedFillHelper : public RActionImpl<BufferedFillHelper> {
251 // this sets a total initial size of 16 MB for the buffers (can increase)
252 static constexpr unsigned int fgTotalBufSize = 2097152;
253 using BufEl_t = double;
254 using Buf_t = std::vector<BufEl_t>;
255
256 std::vector<Buf_t> fBuffers;
257 std::vector<Buf_t> fWBuffers;
258 std::shared_ptr<Hist_t> fResultHist;
259 unsigned int fNSlots;
260 unsigned int fBufSize;
261 /// Histograms containing "snapshots" of partial results. Non-null only if a registered callback requires it.
263 Buf_t fMin;
264 Buf_t fMax;
265
266 void UpdateMinMax(unsigned int slot, double v);
267
268public:
269 BufferedFillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots);
271 BufferedFillHelper(const BufferedFillHelper &) = delete;
272 void InitTask(TTreeReader *, unsigned int) {}
273 void Exec(unsigned int slot, double v);
274 void Exec(unsigned int slot, double v, double w);
275
277 void Exec(unsigned int slot, const T &vs)
278 {
279 auto &thisBuf = fBuffers[slot];
280 // range-based for results in warnings on some compilers due to vector<bool>'s custom reference type
281 for (auto v = vs.begin(); v != vs.end(); ++v) {
283 thisBuf.emplace_back(*v); // TODO: Can be optimised in case T == BufEl_t
284 }
285 }
286
288 void Exec(unsigned int slot, const T &vs, const W &ws)
289 {
290 auto &thisBuf = fBuffers[slot];
291
292 for (auto &v : vs) {
294 thisBuf.emplace_back(v);
295 }
296
297 auto &thisWBuf = fWBuffers[slot];
298 for (auto &w : ws) {
299 thisWBuf.emplace_back(w); // TODO: Can be optimised in case T == BufEl_t
300 }
301 }
302
304 void Exec(unsigned int slot, const T &vs, const W w)
305 {
306 auto &thisBuf = fBuffers[slot];
307 for (auto &v : vs) {
309 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
310 }
311
312 auto &thisWBuf = fWBuffers[slot];
313 thisWBuf.insert(thisWBuf.end(), vs.size(), w);
314 }
315
317 void Exec(unsigned int slot, const T v, const W &ws)
318 {
320 auto &thisBuf = fBuffers[slot];
321 thisBuf.insert(thisBuf.end(), ws.size(), v);
322
323 auto &thisWBuf = fWBuffers[slot];
324 thisWBuf.insert(thisWBuf.end(), ws.begin(), ws.end());
325 }
326
327 Hist_t &PartialUpdate(unsigned int);
328
329 void Initialize() { /* noop */}
330
331 void Finalize();
332
333 // Helper functions for RMergeableValue
334 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
335 {
336 return std::make_unique<RMergeableFill<Hist_t>>(*fResultHist);
337 }
338
339 std::string GetActionName()
340 {
341 return std::string(fResultHist->IsA()->GetName()) + "\\n" + std::string(fResultHist->GetName());
342 }
343
344 BufferedFillHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
345 {
346 auto &result = *static_cast<std::shared_ptr<Hist_t> *>(newResult);
347 result->Reset();
348 result->SetDirectory(nullptr);
349 return BufferedFillHelper(result, fNSlots);
350 }
351};
352
353/// The generic Fill helper: it calls Fill on per-thread objects and then Merge to produce a final result.
354/// For one-dimensional histograms, if no axes are specified, RDataFrame uses BufferedFillHelper instead.
355template <typename HIST = Hist_t>
356class R__CLING_PTRCHECK(off) FillHelper : public RActionImpl<FillHelper<HIST>> {
357 std::vector<HIST *> fObjects;
358
360 void ResetIfPossible(H *h)
361 {
362 h->Reset();
363 }
364
365 void ResetIfPossible(TStatistic *h) { *h = TStatistic(); }
366
367 // cannot safely re-initialize variations of the result, hence error out
368 void ResetIfPossible(...)
369 {
370 throw std::runtime_error(
371 "A systematic variation was requested for a custom Fill action, but the type of the object to be filled does "
372 "not implement a Reset method, so we cannot safely re-initialize variations of the result. Aborting.");
373 }
374
376 h->SetDirectory(nullptr);
377 }
378
379 void UnsetDirectoryIfPossible(...) {}
380
381 // Merge overload for types with Merge(TCollection*), like TH1s
383 auto Merge(std::vector<H *> &objs, int /*toincreaseoverloadpriority*/)
384 -> decltype(objs[0]->Merge((TCollection *)nullptr), void())
385 {
386 TList l;
387 for (auto it = ++objs.begin(); it != objs.end(); ++it)
388 l.Add(*it);
389 objs[0]->Merge(&l);
390 }
391
392 // Merge overload for types with Merge(const std::vector&)
393 template <typename H>
394 auto Merge(std::vector<H *> &objs, double /*toloweroverloadpriority*/)
395 -> decltype(objs[0]->Merge(std::vector<HIST *>{}), void())
396 {
397 objs[0]->Merge({++objs.begin(), objs.end()});
398 }
399
400 // Merge overload to error out in case no valid HIST::Merge method was detected
401 template <typename T>
402 void Merge(T, ...)
403 {
404 static_assert(sizeof(T) < 0,
405 "The type passed to Fill does not provide a Merge(TCollection*) or Merge(const std::vector&) method.");
406 }
407
408 // class which wraps a pointer and implements a no-op increment operator
409 template <typename T>
410 class ScalarConstIterator {
411 const T *obj_;
412
413 public:
414 ScalarConstIterator(const T *obj) : obj_(obj) {}
415 const T &operator*() const { return *obj_; }
416 ScalarConstIterator<T> &operator++() { return *this; }
417 };
418
419 // helper functions which provide one implementation for scalar types and another for containers
420 // TODO these could probably all be replaced by inlined lambdas and/or constexpr if statements
421 // in c++17 or later
422
423 // return unchanged value for scalar
426 {
427 return ScalarConstIterator<T>(&val);
428 }
429
430 // return iterator to beginning of container
432 auto MakeBegin(const T &val)
433 {
434 return std::begin(val);
435 }
436
437 // return 1 for scalars
439 std::size_t GetSize(const T &)
440 {
441 return 1;
442 }
443
444 // return container size
446 std::size_t GetSize(const T &val)
447 {
448#if __cplusplus >= 201703L
449 return std::size(val);
450#else
451 return val.size();
452#endif
453 }
454
455 template <std::size_t ColIdx, typename End_t, typename... Its>
456 void ExecLoop(unsigned int slot, End_t end, Its... its)
457 {
458 auto *thisSlotH = fObjects[slot];
459 // loop increments all of the iterators while leaving scalars unmodified
460 // TODO this could be simplified with fold expressions or std::apply in C++17
461 auto nop = [](auto &&...) {};
462 for (; its...)) {
463 thisSlotH->Fill(*its...);
464 }
465 }
466
467public:
468 FillHelper(FillHelper &&) = default;
469 FillHelper(const FillHelper &) = delete;
470
471 FillHelper(const std::shared_ptr<HIST> &h, const unsigned int nSlots) : fObjects(nSlots, nullptr)
472 {
473 fObjects[0] = h.get();
474 // Initialize all other slots
475 for (unsigned int i = 1; i < nSlots; ++i) {
476 fObjects[i] = new HIST(*fObjects[0]);
477 UnsetDirectoryIfPossible(fObjects[i]);
478 }
479 }
480
481 void InitTask(TTreeReader *, unsigned int) {}
482
483 // no container arguments
484 template <typename... ValTypes, std::enable_if_t<!Disjunction<IsDataContainer<ValTypes>...>::value, int> = 0>
485 auto Exec(unsigned int slot, const ValTypes &...x) -> decltype(fObjects[slot]->Fill(x...), void())
486 {
487 fObjects[slot]->Fill(x...);
488 }
489
490 // at least one container argument
491 template <typename... Xs, std::enable_if_t<Disjunction<IsDataContainer<Xs>...>::value, int> = 0>
492 auto Exec(unsigned int slot, const Xs &...xs) -> decltype(fObjects[slot]->Fill(*MakeBegin(xs)...), void())
493 {
494 // array of bools keeping track of which inputs are containers
495 constexpr std::array<bool, sizeof...(Xs)> isContainer{IsDataContainer<Xs>::value...};
496
497 // index of the first container input
498 constexpr std::size_t colidx = FindIdxTrue(isContainer);
499 // if this happens, there is a bug in the implementation
500 static_assert(colidx < sizeof...(Xs), "Error: index of collection-type argument not found.");
501
502 // get the end iterator to the first container
503 auto const xrefend = std::end(GetNthElement<colidx>(xs...));
504
505 // array of container sizes (1 for scalars)
506 std::array<std::size_t, sizeof...(xs)> sizes = {{GetSize(xs)...}};
507
508 for (std::size_t i = 0; i < sizeof...(xs); ++i) {
509 if (isContainer[i] && sizes[i] != sizes[colidx]) {
510 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
511 }
512 }
513
515 }
516
517 template <typename T = HIST>
518 void Exec(...)
519 {
520 static_assert(sizeof(T) < 0,
521 "When filling an object with RDataFrame (e.g. via a Fill action) the number or types of the "
522 "columns passed did not match the signature of the object's `Fill` method.");
523 }
524
525 void Initialize() { /* noop */}
526
527 void Finalize()
528 {
529 if (fObjects.size() == 1)
530 return;
531
532 Merge(fObjects, /*toselectcorrectoverload=*/0);
533
534 // delete the copies we created for the slots other than the first
535 for (auto it = ++fObjects.begin(); it != fObjects.end(); ++it)
536 delete *it;
537 }
538
539 HIST &PartialUpdate(unsigned int slot) { return *fObjects[slot]; }
540
541 // Helper functions for RMergeableValue
542 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
543 {
544 return std::make_unique<RMergeableFill<HIST>>(*fObjects[0]);
545 }
546
547 // if the fObjects vector type is derived from TObject, return the name of the object
549 std::string GetActionName()
550 {
551 return std::string(fObjects[0]->IsA()->GetName()) + "\\n" + std::string(fObjects[0]->GetName());
552 }
553
554 // if fObjects is not derived from TObject, indicate it is some other object
556 std::string GetActionName()
557 {
558 return "Fill custom object";
559 }
560
561 template <typename H = HIST>
562 FillHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
563 {
564 auto &result = *static_cast<std::shared_ptr<H> *>(newResult);
565 ResetIfPossible(result.get());
567 return FillHelper(result, fObjects.size());
568 }
569};
570
572public:
573 using Result_t = ::TGraph;
574
575private:
576 std::vector<::TGraph *> fGraphs;
577
578public:
580 FillTGraphHelper(const FillTGraphHelper &) = delete;
581
582 FillTGraphHelper(const std::shared_ptr<::TGraph> &g, const unsigned int nSlots) : fGraphs(nSlots, nullptr)
583 {
584 fGraphs[0] = g.get();
585 // Initialize all other slots
586 for (unsigned int i = 1; i < nSlots; ++i) {
587 fGraphs[i] = new TGraph(*fGraphs[0]);
588 }
589 }
590
591 void Initialize() {}
592 void InitTask(TTreeReader *, unsigned int) {}
593
594 // case: both types are container types
595 template <typename X0, typename X1,
596 std::enable_if_t<IsDataContainer<X0>::value && IsDataContainer<X1>::value, int> = 0>
597 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
598 {
599 if (x0s.size() != x1s.size()) {
600 throw std::runtime_error("Cannot fill Graph with values in containers of different sizes.");
601 }
602 auto *thisSlotG = fGraphs[slot];
603 auto x0sIt = std::begin(x0s);
604 const auto x0sEnd = std::end(x0s);
605 auto x1sIt = std::begin(x1s);
606 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
607 thisSlotG->SetPoint(thisSlotG->GetN(), *x0sIt, *x1sIt);
608 }
609 }
610
611 // case: both types are non-container types, e.g. scalars
612 template <typename X0, typename X1,
613 std::enable_if_t<!IsDataContainer<X0>::value && !IsDataContainer<X1>::value, int> = 0>
614 void Exec(unsigned int slot, X0 x0, X1 x1)
615 {
616 auto thisSlotG = fGraphs[slot];
617 thisSlotG->SetPoint(thisSlotG->GetN(), x0, x1);
618 }
619
620 // case: types are combination of containers and non-containers
621 // this is not supported, error out
622 template <typename X0, typename X1, typename... ExtraArgsToLowerPriority>
623 void Exec(unsigned int, X0, X1, ExtraArgsToLowerPriority...)
624 {
625 throw std::runtime_error("Graph was applied to a mix of scalar values and collections. This is not supported.");
626 }
627
628 void Finalize()
629 {
630 const auto nSlots = fGraphs.size();
631 auto resGraph = fGraphs[0];
632 TList l;
633 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
634 for (unsigned int slot = 1; slot < nSlots; ++slot) {
635 l.Add(fGraphs[slot]);
636 }
637 resGraph->Merge(&l);
638 }
639
640 // Helper functions for RMergeableValue
641 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
642 {
643 return std::make_unique<RMergeableFill<Result_t>>(*fGraphs[0]);
644 }
645
646 std::string GetActionName() { return "Graph"; }
647
648 Result_t &PartialUpdate(unsigned int slot) { return *fGraphs[slot]; }
649
650 FillTGraphHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
651 {
652 auto &result = *static_cast<std::shared_ptr<TGraph> *>(newResult);
653 result->Set(0);
654 return FillTGraphHelper(result, fGraphs.size());
655 }
656};
657
659 : public ROOT::Detail::RDF::RActionImpl<FillTGraphAsymmErrorsHelper> {
660public:
661 using Result_t = ::TGraphAsymmErrors;
662
663private:
664 std::vector<::TGraphAsymmErrors *> fGraphAsymmErrors;
665
666public:
669
670 FillTGraphAsymmErrorsHelper(const std::shared_ptr<::TGraphAsymmErrors> &g, const unsigned int nSlots)
671 : fGraphAsymmErrors(nSlots, nullptr)
672 {
673 fGraphAsymmErrors[0] = g.get();
674 // Initialize all other slots
675 for (unsigned int i = 1; i < nSlots; ++i) {
677 }
678 }
679
680 void Initialize() {}
681 void InitTask(TTreeReader *, unsigned int) {}
682
683 // case: all types are container types
684 template <
685 typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
686 std::enable_if_t<IsDataContainer<X>::value && IsDataContainer<Y>::value && IsDataContainer<EXL>::value &&
687 IsDataContainer<EXH>::value && IsDataContainer<EYL>::value && IsDataContainer<EYH>::value,
688 int> = 0>
689 void
690 Exec(unsigned int slot, const X &xs, const Y &ys, const EXL &exls, const EXH &exhs, const EYL &eyls, const EYH &eyhs)
691 {
692 if ((xs.size() != ys.size()) || (xs.size() != exls.size()) || (xs.size() != exhs.size()) ||
693 (xs.size() != eyls.size()) || (xs.size() != eyhs.size())) {
694 throw std::runtime_error("Cannot fill GraphAsymmErrors with values in containers of different sizes.");
695 }
697 auto xsIt = std::begin(xs);
698 auto ysIt = std::begin(ys);
699 auto exlsIt = std::begin(exls);
700 auto exhsIt = std::begin(exhs);
701 auto eylsIt = std::begin(eyls);
702 auto eyhsIt = std::begin(eyhs);
703 while (xsIt != std::end(xs)) {
704 const auto n = thisSlotG->GetN(); // must use the same `n` for SetPoint and SetPointError
705 thisSlotG->SetPoint(n, *xsIt++, *ysIt++);
706 thisSlotG->SetPointError(n, *exlsIt++, *exhsIt++, *eylsIt++, *eyhsIt++);
707 }
708 }
709
710 // case: all types are non-container types, e.g. scalars
711 template <
712 typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
713 std::enable_if_t<!IsDataContainer<X>::value && !IsDataContainer<Y>::value && !IsDataContainer<EXL>::value &&
714 !IsDataContainer<EXH>::value && !IsDataContainer<EYL>::value && !IsDataContainer<EYH>::value,
715 int> = 0>
716 void Exec(unsigned int slot, X x, Y y, EXL exl, EXH exh, EYL eyl, EYH eyh)
717 {
719 const auto n = thisSlotG->GetN();
720 thisSlotG->SetPoint(n, x, y);
721 thisSlotG->SetPointError(n, exl, exh, eyl, eyh);
722 }
723
724 // case: types are combination of containers and non-containers
725 // this is not supported, error out
726 template <typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
727 typename... ExtraArgsToLowerPriority>
728 void Exec(unsigned int, X, Y, EXL, EXH, EYL, EYH, ExtraArgsToLowerPriority...)
729 {
730 throw std::runtime_error(
731 "GraphAsymmErrors was applied to a mix of scalar values and collections. This is not supported.");
732 }
733
734 void Finalize()
735 {
736 const auto nSlots = fGraphAsymmErrors.size();
738 TList l;
739 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
740 for (unsigned int slot = 1; slot < nSlots; ++slot) {
742 }
743 resGraphAsymmErrors->Merge(&l);
744 }
745
746 // Helper functions for RMergeableValue
747 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
748 {
749 return std::make_unique<RMergeableFill<Result_t>>(*fGraphAsymmErrors[0]);
750 }
751
752 std::string GetActionName() { return "GraphAsymmErrors"; }
753
754 Result_t &PartialUpdate(unsigned int slot) { return *fGraphAsymmErrors[slot]; }
755
756 FillTGraphAsymmErrorsHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
757 {
758 auto &result = *static_cast<std::shared_ptr<TGraphAsymmErrors> *>(newResult);
759 result->Set(0);
761 }
762};
763
764// In case of the take helper we have 4 cases:
765// 1. The column is not an RVec, the collection is not a vector
766// 2. The column is not an RVec, the collection is a vector
767// 3. The column is an RVec, the collection is not a vector
768// 4. The column is an RVec, the collection is a vector
769
770template <typename V, typename COLL>
771void FillColl(V&& v, COLL& c) {
772 c.emplace_back(v);
773}
774
775// Use push_back for bool since some compilers do not support emplace_back.
776template <typename COLL>
777void FillColl(bool v, COLL& c) {
778 c.push_back(v);
779}
780
781// Case 1.: The column is not an RVec, the collection is not a vector
782// No optimisations, no transformations: just copies.
783template <typename RealT_t, typename T, typename COLL>
784class R__CLING_PTRCHECK(off) TakeHelper : public RActionImpl<TakeHelper<RealT_t, T, COLL>> {
786
787public:
788 using ColumnTypes_t = TypeList<T>;
789 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
790 {
791 fColls.emplace_back(resultColl);
792 for (unsigned int i = 1; i < nSlots; ++i)
793 fColls.emplace_back(std::make_shared<COLL>());
794 }
796 TakeHelper(const TakeHelper &) = delete;
797
798 void InitTask(TTreeReader *, unsigned int) {}
799
800 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
801
802 void Initialize() { /* noop */}
803
804 void Finalize()
805 {
806 auto rColl = fColls[0];
807 for (unsigned int i = 1; i < fColls.size(); ++i) {
808 const auto &coll = fColls[i];
809 const auto end();
810 // Use an explicit loop here to prevent compiler warnings introduced by
811 // clang's range-based loop analysis and vector<bool> references.
812 for (auto j++) {
813 FillColl(*j, *rColl);
814 }
815 }
816 }
817
818 COLL &PartialUpdate(unsigned int slot) { return *fColls[slot].get(); }
819
820 std::string GetActionName() { return "Take"; }
821
822 TakeHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
823 {
824 auto &result = *static_cast<std::shared_ptr<COLL> *>(newResult);
825 result->clear();
826 return TakeHelper(result, fColls.size());
827 }
828};
829
830// Case 2.: The column is not an RVec, the collection is a vector
831// Optimisations, no transformations: just copies.
832template <typename RealT_t, typename T>
833class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, T, std::vector<T>>
834 : public RActionImpl<TakeHelper<RealT_t, T, std::vector<T>>> {
836
837public:
838 using ColumnTypes_t = TypeList<T>;
839 TakeHelper(const std::shared_ptr<std::vector<T>> &resultColl, const unsigned int nSlots)
840 {
841 fColls.emplace_back(resultColl);
842 for (unsigned int i = 1; i < nSlots; ++i) {
843 auto v = std::make_shared<std::vector<T>>();
844 v->reserve(1024);
845 fColls.emplace_back(v);
846 }
847 }
849 TakeHelper(const TakeHelper &) = delete;
850
851 void InitTask(TTreeReader *, unsigned int) {}
852
853 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
854
855 void Initialize() { /* noop */}
856
857 // This is optimised to treat vectors
858 void Finalize()
859 {
860 ULong64_t totSize = 0;
861 for (auto &coll : fColls)
862 totSize += coll->size();
863 auto rColl = fColls[0];
864 rColl->reserve(totSize);
865 for (unsigned int i = 1; i < fColls.size(); ++i) {
866 auto &coll = fColls[i];
867 rColl->insert(rColl->end(), coll->begin(), coll->end());
868 }
869 }
870
871 std::vector<T> &PartialUpdate(unsigned int slot) { return *fColls[slot]; }
872
873 std::string GetActionName() { return "Take"; }
874
875 TakeHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
876 {
877 auto &result = *static_cast<std::shared_ptr<std::vector<T>> *>(newResult);
878 result->clear();
879 return TakeHelper(result, fColls.size());
880 }
881};
882
883// Case 3.: The column is a RVec, the collection is not a vector
884// No optimisations, transformations from RVecs to vectors
885template <typename RealT_t, typename COLL>
887 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, COLL>> {
889
890public:
891 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
892 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
893 {
894 fColls.emplace_back(resultColl);
895 for (unsigned int i = 1; i < nSlots; ++i)
896 fColls.emplace_back(std::make_shared<COLL>());
897 }
899 TakeHelper(const TakeHelper &) = delete;
900
901 void InitTask(TTreeReader *, unsigned int) {}
902
903 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
904
905 void Initialize() { /* noop */}
906
907 void Finalize()
908 {
909 auto rColl = fColls[0];
910 for (unsigned int i = 1; i < fColls.size(); ++i) {
911 auto &coll = fColls[i];
912 for (auto &v : *coll) {
913 rColl->emplace_back(v);
914 }
915 }
916 }
917
918 std::string GetActionName() { return "Take"; }
919
920 TakeHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
921 {
922 auto &result = *static_cast<std::shared_ptr<COLL> *>(newResult);
923 result->clear();
924 return TakeHelper(result, fColls.size());
925 }
926};
927
928// Case 4.: The column is an RVec, the collection is a vector
929// Optimisations, transformations from RVecs to vectors
930template <typename RealT_t>
931class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>
932 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>> {
933
935
936public:
937 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
938 TakeHelper(const std::shared_ptr<std::vector<std::vector<RealT_t>>> &resultColl, const unsigned int nSlots)
939 {
940 fColls.emplace_back(resultColl);
941 for (unsigned int i = 1; i < nSlots; ++i) {
942 auto v = std::make_shared<std::vector<RealT_t>>();
943 v->reserve(1024);
944 fColls.emplace_back(v);
945 }
946 }
948 TakeHelper(const TakeHelper &) = delete;
949
950 void InitTask(TTreeReader *, unsigned int) {}
951
952 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
953
954 void Initialize() { /* noop */}
955
956 // This is optimised to treat vectors
957 void Finalize()
958 {
959 ULong64_t totSize = 0;
960 for (auto &coll : fColls)
961 totSize += coll->size();
962 auto rColl = fColls[0];
963 rColl->reserve(totSize);
964 for (unsigned int i = 1; i < fColls.size(); ++i) {
965 auto &coll = fColls[i];
966 rColl->insert(rColl->end(), coll->begin(), coll->end());
967 }
968 }
969
970 std::string GetActionName() { return "Take"; }
971
972 TakeHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
973 {
974 auto &result = *static_cast<typename decltype(fColls)::value_type *>(newResult);
975 result->clear();
976 return TakeHelper(result, fColls.size());
977 }
978};
979
980// Extern templates for TakeHelper
981// NOTE: The move-constructor of specializations declared as extern templates
982// must be defined out of line, otherwise cling fails to find its symbol.
983template <typename RealT_t, typename T, typename COLL>
985template <typename RealT_t, typename T>
987template <typename RealT_t, typename COLL>
989template <typename RealT_t>
990TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>> &&) = default;
991
992// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
993#if __GNUC__ > 5
994extern template class TakeHelper<bool, bool, std::vector<bool>>;
998extern template class TakeHelper<int, int, std::vector<int>>;
999extern template class TakeHelper<long, long, std::vector<long>>;
1001extern template class TakeHelper<float, float, std::vector<float>>;
1003#endif
1004
1005template <typename ResultType>
1006class R__CLING_PTRCHECK(off) MinHelper : public RActionImpl<MinHelper<ResultType>> {
1007 std::shared_ptr<ResultType> fResultMin;
1009
1010public:
1011 MinHelper(MinHelper &&) = default;
1012 MinHelper(const std::shared_ptr<ResultType> &minVPtr, const unsigned int nSlots)
1013 : fResultMin(minVPtr), fMins(nSlots, std::numeric_limits<ResultType>::max())
1014 {
1015 }
1016
1017 void Exec(unsigned int slot, ResultType v) { fMins[slot] = std::min(v, fMins[slot]); }
1018
1019 void InitTask(TTreeReader *, unsigned int) {}
1020
1022 void Exec(unsigned int slot, const T &vs)
1023 {
1024 for (auto &&v : vs)
1025 fMins[slot] = std::min(static_cast<ResultType>(v), fMins[slot]);
1026 }
1027
1028 void Initialize() { /* noop */}
1029
1030 void Finalize()
1031 {
1032 *fResultMin = std::numeric_limits<ResultType>::max();
1033 for (auto &m : fMins)
1034 *fResultMin = std::min(m, *fResultMin);
1035 }
1036
1037 // Helper functions for RMergeableValue
1038 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1039 {
1040 return std::make_unique<RMergeableMin<ResultType>>(*fResultMin);
1041 }
1042
1043 ResultType &PartialUpdate(unsigned int slot) { return fMins[slot]; }
1044
1045 std::string GetActionName() { return "Min"; }
1046
1047 MinHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
1048 {
1049 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1050 return MinHelper(result, fMins.size());
1051 }
1052};
1053
1054template <typename ResultType>
1055class R__CLING_PTRCHECK(off) MaxHelper : public RActionImpl<MaxHelper<ResultType>> {
1056 std::shared_ptr<ResultType> fResultMax;
1058
1059public:
1060 MaxHelper(MaxHelper &&) = default;
1061 MaxHelper(const MaxHelper &) = delete;
1062 MaxHelper(const std::shared_ptr<ResultType> &maxVPtr, const unsigned int nSlots)
1063 : fResultMax(maxVPtr), fMaxs(nSlots, std::numeric_limits<ResultType>::lowest())
1064 {
1065 }
1066
1067 void InitTask(TTreeReader *, unsigned int) {}
1068 void Exec(unsigned int slot, ResultType v) { fMaxs[slot] = std::max(v, fMaxs[slot]); }
1069
1071 void Exec(unsigned int slot, const T &vs)
1072 {
1073 for (auto &&v : vs)
1074 fMaxs[slot] = std::max(static_cast<ResultType>(v), fMaxs[slot]);
1075 }
1076
1077 void Initialize() { /* noop */}
1078
1079 void Finalize()
1080 {
1081 *fResultMax = std::numeric_limits<ResultType>::lowest();
1082 for (auto &m : fMaxs) {
1083 *fResultMax = std::max(m, *fResultMax);
1084 }
1085 }
1086
1087 // Helper functions for RMergeableValue
1088 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1089 {
1090 return std::make_unique<RMergeableMax<ResultType>>(*fResultMax);
1091 }
1092
1093 ResultType &PartialUpdate(unsigned int slot) { return fMaxs[slot]; }
1094
1095 std::string GetActionName() { return "Max"; }
1096
1097 MaxHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
1098 {
1099 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1100 return MaxHelper(result, fMaxs.size());
1101 }
1102};
1103
1104template <typename ResultType>
1105class R__CLING_PTRCHECK(off) SumHelper : public RActionImpl<SumHelper<ResultType>> {
1106 std::shared_ptr<ResultType> fResultSum;
1109
1110 /// Evaluate neutral element for this type and the sum operation.
1111 /// This is assumed to be any_value - any_value if operator- is defined
1112 /// for the type, otherwise a default-constructed ResultType{} is used.
1113 template <typename T = ResultType>
1114 auto NeutralElement(const T &v, int /*overloadresolver*/) -> decltype(v - v)
1115 {
1116 return v - v;
1117 }
1118
1119 template <typename T = ResultType, typename Dummy = int>
1120 ResultType NeutralElement(const T &, Dummy) // this overload has lower priority thanks to the template arg
1121 {
1122 return ResultType{};
1123 }
1124
1125public:
1126 SumHelper(SumHelper &&) = default;
1127 SumHelper(const SumHelper &) = delete;
1128 SumHelper(const std::shared_ptr<ResultType> &sumVPtr, const unsigned int nSlots)
1131 {
1132 }
1133 void InitTask(TTreeReader *, unsigned int) {}
1134
1135 void Exec(unsigned int slot, ResultType x)
1136 {
1137 // Kahan Sum:
1139 ResultType t = fSums[slot] + y;
1140 fCompensations[slot] = (t - fSums[slot]) - y;
1141 fSums[slot] = t;
1142 }
1143
1145 void Exec(unsigned int slot, const T &vs)
1146 {
1147 for (auto &&v : vs) {
1148 Exec(slot, v);
1149 }
1150 }
1151
1152 void Initialize() { /* noop */}
1153
1154 void Finalize()
1155 {
1160 for (auto &m : fSums) {
1161 // Kahan Sum:
1162 y = m - compensation;
1163 t = sum + y;
1164 compensation = (t - sum) - y;
1165 sum = t;
1166 }
1167 *fResultSum += sum;
1168 }
1169
1170 // Helper functions for RMergeableValue
1171 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1172 {
1173 return std::make_unique<RMergeableSum<ResultType>>(*fResultSum);
1174 }
1175
1176 ResultType &PartialUpdate(unsigned int slot) { return fSums[slot]; }
1177
1178 std::string GetActionName() { return "Sum"; }
1179
1180 SumHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
1181 {
1182 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1183 *result = NeutralElement(*result, -1);
1184 return SumHelper(result, fSums.size());
1185 }
1186};
1187
1188class R__CLING_PTRCHECK(off) MeanHelper : public RActionImpl<MeanHelper> {
1189 std::shared_ptr<double> fResultMean;
1190 std::vector<ULong64_t> fCounts;
1191 std::vector<double> fSums;
1192 std::vector<double> fPartialMeans;
1193 std::vector<double> fCompensations;
1194
1195public:
1196 MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
1197 MeanHelper(MeanHelper &&) = default;
1198 MeanHelper(const MeanHelper &) = delete;
1199 void InitTask(TTreeReader *, unsigned int) {}
1200 void Exec(unsigned int slot, double v);
1201
1203 void Exec(unsigned int slot, const T &vs)
1204 {
1205 for (auto &&v : vs) {
1206
1207 fCounts[slot]++;
1208 // Kahan Sum:
1209 double y = v - fCompensations[slot];
1210 double t = fSums[slot] + y;
1211 fCompensations[slot] = (t - fSums[slot]) - y;
1212 fSums[slot] = t;
1213 }
1214 }
1215
1216 void Initialize() { /* noop */}
1217
1218 void Finalize();
1219
1220 // Helper functions for RMergeableValue
1221 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1222 {
1223 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1224 return std::make_unique<RMergeableMean>(*fResultMean, counts);
1225 }
1226
1227 double &PartialUpdate(unsigned int slot);
1228
1229 std::string GetActionName() { return "Mean"; }
1230
1231 MeanHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
1232 {
1233 auto &result = *static_cast<std::shared_ptr<double> *>(newResult);
1234 return MeanHelper(result, fSums.size());
1235 }
1236};
1237
1238class R__CLING_PTRCHECK(off) StdDevHelper : public RActionImpl<StdDevHelper> {
1239 // Number of subsets of data
1240 unsigned int fNSlots;
1241 std::shared_ptr<double> fResultStdDev;
1242 // Number of element for each slot
1243 std::vector<ULong64_t> fCounts;
1244 // Mean of each slot
1245 std::vector<double> fMeans;
1246 // Squared distance from the mean
1247 std::vector<double> fDistancesfromMean;
1248
1249public:
1250 StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
1251 StdDevHelper(StdDevHelper &&) = default;
1252 StdDevHelper(const StdDevHelper &) = delete;
1253 void InitTask(TTreeReader *, unsigned int) {}
1254 void Exec(unsigned int slot, double v);
1255
1257 void Exec(unsigned int slot, const T &vs)
1258 {
1259 for (auto &&v : vs) {
1260 Exec(slot, v);
1261 }
1262 }
1263
1264 void Initialize() { /* noop */}
1265
1266 void Finalize();
1267
1268 // Helper functions for RMergeableValue
1269 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1270 {
1271 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1272 const Double_t mean =
1273 std::inner_product(fMeans.begin(), fMeans.end(), fCounts.begin(), 0.) / static_cast<Double_t>(counts);
1274 return std::make_unique<RMergeableStdDev>(*fResultStdDev, counts, mean);
1275 }
1276
1277 std::string GetActionName() { return "StdDev"; }
1278
1279 StdDevHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
1280 {
1281 auto &result = *static_cast<std::shared_ptr<double> *>(newResult);
1282 return StdDevHelper(result, fCounts.size());
1283 }
1284};
1285
1286template <typename PrevNodeType>
1287class R__CLING_PTRCHECK(off) DisplayHelper : public RActionImpl<DisplayHelper<PrevNodeType>> {
1288private:
1290 std::shared_ptr<Display_t> fDisplayerHelper;
1291 std::shared_ptr<PrevNodeType> fPrevNode;
1292 size_t fEntriesToProcess;
1293
1294public:
1295 DisplayHelper(size_t nRows, const std::shared_ptr<Display_t> &d, const std::shared_ptr<PrevNodeType> &prevNode)
1296 : fDisplayerHelper(d), fPrevNode(prevNode), fEntriesToProcess(nRows)
1297 {
1298 }
1299 DisplayHelper(DisplayHelper &&) = default;
1300 DisplayHelper(const DisplayHelper &) = delete;
1301 void InitTask(TTreeReader *, unsigned int) {}
1302
1303 template <typename... Columns>
1304 void Exec(unsigned int, Columns &... columns)
1305 {
1306 if (fEntriesToProcess == 0)
1307 return;
1308
1309 fDisplayerHelper->AddRow(columns...);
1310 --fEntriesToProcess;
1311
1312 if (fEntriesToProcess == 0) {
1313 // No more entries to process. Send a one-time signal that this node
1314 // of the graph is done. It is important that the 'StopProcessing'
1315 // method is only called once from this helper, otherwise it would seem
1316 // like more than one operation has completed its work.
1317 fPrevNode->StopProcessing();
1318 }
1319 }
1320
1321 void Initialize() {}
1322
1323 void Finalize() {}
1324
1325 std::string GetActionName() { return "Display"; }
1326};
1327
1328template <typename T>
1329void *GetData(ROOT::VecOps::RVec<T> &v)
1330{
1331 return v.data();
1332}
1333
1334template <typename T>
1335void *GetData(T & /*v*/)
1336{
1337 return nullptr;
1338}
1339
1340template <typename T>
1341void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &name,
1342 TBranch *&branch, void *&branchAddress, T *address, RBranchSet &outputBranches,
1343 bool /*isDefine*/, int basketSize)
1344{
1345 static TClassRef TBOClRef("TBranchObject");
1346
1347 TBranch *inputBranch = nullptr;
1348 if (inputTree) {
1349 inputBranch = inputTree->GetBranch(inName.c_str());
1350 if (!inputBranch) // try harder
1351 inputBranch = inputTree->FindBranch(inName.c_str());
1352 }
1353
1354 auto *outputBranch = outputBranches.Get(name);
1355 if (outputBranch) {
1356 // the output branch was already created, we just need to (re)set its address
1357 if (inputBranch && inputBranch->IsA() == TBOClRef) {
1358 outputBranch->SetAddress(reinterpret_cast<T **>(inputBranch->GetAddress()));
1359 } else if (outputBranch->IsA() != TBranch::Class()) {
1360 branchAddress = address;
1361 outputBranch->SetAddress(&branchAddress);
1362 } else {
1363 outputBranch->SetAddress(address);
1364 branchAddress = address;
1365 }
1366 return;
1367 }
1368
1369 if (inputBranch) {
1370 // Respect the original bufsize and splitlevel arguments
1371 // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid
1372 // writing garbage when unsplit objects cannot be written as split objects (e.g. in case of a polymorphic
1373 // TObject branch, see https://bit.ly/2EjLMId ).
1374 // A user-provided basket size value takes precedence.
1375 const auto bufSize = (basketSize > 0) ? basketSize : inputBranch->GetBasketSize();
1376 const auto splitLevel = inputBranch->GetSplitLevel();
1377
1378 if (inputBranch->IsA() == TBOClRef) {
1379 // Need to pass a pointer to pointer
1380 outputBranch =
1381 outputTree.Branch(name.c_str(), reinterpret_cast<T **>(inputBranch->GetAddress()), bufSize, splitLevel);
1382 } else {
1383 outputBranch = outputTree.Branch(name.c_str(), address, bufSize, splitLevel);
1384 }
1385 } else {
1386 // Set Custom basket size for new branches.
1387 const auto buffSize = (basketSize > 0) ? basketSize : (inputBranch ? inputBranch->GetBasketSize() : 32000);
1388 outputBranch = outputTree.Branch(name.c_str(), address, buffSize);
1389 }
1391 // This is not an array branch, so we don't register the address of the output branch here
1392 branch = nullptr;
1393 branchAddress = nullptr;
1394}
1395
1396/// Helper function for SnapshotTTreeHelper and SnapshotTTreeHelperMT. It creates new branches for the output TTree of a
1397/// Snapshot. This overload is called for columns of type `RVec<T>`. For RDF, these can represent:
1398/// 1. c-style arrays in ROOT files, so we are sure that there are input trees to which we can ask the correct branch
1399/// title
1400/// 2. RVecs coming from a custom column or the input file/data-source
1401/// 3. vectors coming from ROOT files that are being read as RVecs
1402/// 4. TClonesArray
1403///
1404/// In case of 1., we keep aside the pointer to the branch and the pointer to the input value (in `branch` and
1405/// `branchAddress`) so we can intercept changes in the address of the input branch and tell the output branch.
1406template <typename T>
1407void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &outName,
1409 int basketSize)
1410{
1411 TBranch *inputBranch = nullptr;
1412 if (inputTree) {
1413 inputBranch = inputTree->GetBranch(inName.c_str());
1414 if (!inputBranch) // try harder
1415 inputBranch = inputTree->FindBranch(inName.c_str());
1416 }
1417 auto *outputBranch = outputBranches.Get(outName);
1418
1419 // if no backing input branch, we must write out an RVec
1420 bool mustWriteRVec = (inputBranch == nullptr || isDefine);
1421 // otherwise, if input branch is TClonesArray, must write out an RVec
1422 if (!mustWriteRVec && std::string_view(inputBranch->GetClassName()) == "TClonesArray") {
1423 mustWriteRVec = true;
1424 Warning("Snapshot",
1425 "Branch \"%s\" contains TClonesArrays but the type specified to Snapshot was RVec<T>. The branch will "
1426 "be written out as a RVec instead of a TClonesArray. Specify that the type of the branch is "
1427 "TClonesArray as a Snapshot template parameter to write out a TClonesArray instead.",
1428 inName.c_str());
1429 }
1430 // otherwise, if input branch is a std::vector or RVec, must write out an RVec
1431 if (!mustWriteRVec) {
1432 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
1433 if (STLKind == ROOT::ESTLType::kSTLvector || STLKind == ROOT::ESTLType::kROOTRVec)
1434 mustWriteRVec = true;
1435 }
1436
1437 if (mustWriteRVec) {
1438 // Treat:
1439 // 2. RVec coming from a custom column or a source
1440 // 3. RVec coming from a column on disk of type vector (the RVec is adopting the data of that vector)
1441 // 4. TClonesArray written out as RVec<T>
1442 if (outputBranch) {
1443 // needs to be SetObject (not SetAddress) to mimic what happens when this TBranchElement is constructed
1444 outputBranch->SetObject(ab);
1445 } else {
1446 // Set Custom basket size for new branches if specified, otherwise get basket size from input branches
1447 const auto buffSize = (basketSize > 0) ? basketSize : (inputBranch ? inputBranch->GetBasketSize() : 32000);
1448 auto *b = outputTree.Branch(outName.c_str(), ab, buffSize);
1449 outputBranches.Insert(outName, b);
1450 }
1451 return;
1452 }
1453
1454 // else this must be a C-array, aka case 1.
1455 auto dataPtr = ab->data();
1456
1457 if (outputBranch) {
1458 if (outputBranch->IsA() != TBranch::Class()) {
1460 outputBranch->SetAddress(&branchAddress);
1461 } else {
1462 outputBranch->SetAddress(dataPtr);
1463 }
1464 } else {
1465 // must construct the leaflist for the output branch and create the branch in the output tree
1466 auto *const leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
1467 const auto bname = leaf->GetName();
1468 auto *sizeLeaf = leaf->GetLeafCount();
1469 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
1470
1471 if (sizeLeaf && !outputBranches.Get(sizeLeafName)) {
1472 // The output array branch `bname` has dynamic size stored in leaf `sizeLeafName`, but that leaf has not been
1473 // added to the output tree yet. However, the size leaf has to be available for the creation of the array
1474 // branch to be successful. So we create the size leaf here.
1475 const auto sizeTypeStr = TypeName2ROOTTypeName(sizeLeaf->GetTypeName());
1476 // Use Original basket size for Existing Branches otherwise use Custom basket Size.
1477 const auto sizeBufSize = (basketSize > 0) ? basketSize : sizeLeaf->GetBranch()->GetBasketSize();
1478 // The null branch address is a placeholder. It will be set when SetBranchesHelper is called for `sizeLeafName`
1479 auto *sizeBranch = outputTree.Branch(sizeLeafName.c_str(), (void *)nullptr,
1480 (sizeLeafName + '/' + sizeTypeStr).c_str(), sizeBufSize);
1482 }
1483
1484 const auto btype = leaf->GetTypeName();
1485 const auto rootbtype = TypeName2ROOTTypeName(btype);
1486 if (rootbtype == ' ') {
1487 Warning("Snapshot",
1488 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. This "
1489 "column will not be written out.",
1490 bname);
1491 } else {
1492 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
1493 // Use original basket size for existing branches and new basket size for new branches
1494 const auto branchBufSize = (basketSize > 0) ? basketSize : inputBranch->GetBasketSize();
1495 outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str(), branchBufSize);
1496 outputBranch->SetTitle(inputBranch->GetTitle());
1499 branchAddress = ab->data();
1500 }
1501 }
1502}
1503
1505 const std::string &inputBranchName, const std::string &outputBranchName,
1506 const std::type_info &typeInfo, int basketSize);
1507
1508/// Ensure that the TTree with the resulting snapshot can be written to the target TFile. This means checking that the
1509/// TFile can be opened in the mode specified in `opts`, deleting any existing TTrees in case
1510/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
1511void EnsureValidSnapshotTTreeOutput(const RSnapshotOptions &opts, const std::string &treeName,
1512 const std::string &fileName);
1513
1514/// Helper object for a single-thread TTree-based Snapshot action
1515template <typename... ColTypes>
1516class R__CLING_PTRCHECK(off) SnapshotTTreeHelper : public RActionImpl<SnapshotTTreeHelper<ColTypes...>> {
1517 std::string fFileName;
1518 std::string fDirName;
1519 std::string fTreeName;
1520 RSnapshotOptions fOptions;
1521 std::unique_ptr<TFile> fOutputFile;
1522 std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
1523 bool fBranchAddressesNeedReset{true};
1524 ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1525 ColumnNames_t fOutputBranchNames;
1526 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
1527 // TODO we might be able to unify fBranches, fBranchAddresses and fOutputBranches
1528 std::vector<TBranch *> fBranches; // Addresses of branches in output, non-null only for the ones holding C arrays
1529 std::vector<void *> fBranchAddresses; // Addresses of objects associated to output branches
1531 std::vector<bool> fIsDefine;
1534
1535public:
1536 using ColumnTypes_t = TypeList<ColTypes...>;
1537 SnapshotTTreeHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
1538 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options,
1541 : fFileName(filename),
1542 fDirName(dirname),
1543 fTreeName(treename),
1544 fOptions(options),
1547 fBranches(vbnames.size(), nullptr),
1548 fBranchAddresses(vbnames.size(), nullptr),
1549 fIsDefine(std::move(isDefine)),
1552 {
1553 EnsureValidSnapshotTTreeOutput(fOptions, fTreeName, fFileName);
1554 }
1555
1556 SnapshotTTreeHelper(const SnapshotTTreeHelper &) = delete;
1559 {
1560 if (!fTreeName.empty() /*not moved from*/ && !fOutputFile /* did not run */ && fOptions.fLazy) {
1561 const auto fileOpenMode = [&]() {
1562 TString checkupdate = fOptions.fMode;
1563 checkupdate.ToLower();
1564 return checkupdate == "update" ? "updated" : "created";
1565 }();
1566 Warning("Snapshot",
1567 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
1568 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
1569 "its result in a variable and for example calling the GetValue() method on it.",
1570 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
1571 }
1572 }
1573
1574 void InitTask(TTreeReader * /*treeReader*/, unsigned int /* slot */)
1575 {
1576 // We ask the input RLoopManager if it has a TTree. We cannot rely on getting this information when constructing
1577 // this action helper, since the TTree might change e.g. when ChangeSpec is called in-between distributed tasks.
1578 fInputTree = fInputLoopManager->GetTree();
1580 }
1581
1582 void Exec(unsigned int /* slot */, ColTypes &... values)
1583 {
1584 using ind_t = std::index_sequence_for<ColTypes...>;
1586 UpdateCArraysPtrs(values..., ind_t{});
1587 } else {
1588 SetBranches(values..., ind_t{});
1590 }
1591 fOutputTree->Fill();
1592 }
1593
1594 template <std::size_t... S>
1595 void UpdateCArraysPtrs(ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1596 {
1597 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1598 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1599 // leaving associated to the branch of the output tree an invalid pointer.
1600 // With this code, we set the value of the pointer in the output branch anew when needed.
1601 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1602 // we need an int for the expander list.
1603 int expander[] = {(fBranches[S] && fBranchAddresses[S] != GetData(values)
1604 ? fBranches[S]->SetAddress(GetData(values)),
1605 fBranchAddresses[S] = GetData(values), 0 : 0, 0)...,
1606 0};
1607 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1608 }
1609
1610 template <std::size_t... S>
1611 void SetBranches(ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1612 {
1613 // create branches in output tree
1614 int expander[] = {
1616 fBranchAddresses[S], &values, fOutputBranches, fIsDefine[S], fOptions.fBasketSize),
1617 0)...,
1618 0};
1619 fOutputBranches.AssertNoNullBranchAddresses();
1620 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1621 }
1622
1623 template <std::size_t... S>
1624 void SetEmptyBranches(TTree *inputTree, TTree &outputTree, std::index_sequence<S...>)
1625 {
1627 // We use the expander trick rather than a fold expression to avoid incurring in the bracket depth limit of clang
1629 fOutputBranchNames[S], typeid(ColTypes), fOptions.fBasketSize),
1630 0)...,
1631 0};
1632 (void)expander;
1633 }
1634
1635 void Initialize()
1636 {
1637 fOutputFile.reset(
1638 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
1640 if(!fOutputFile)
1641 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
1642
1643 TDirectory *outputDir = fOutputFile.get();
1644 if (!fDirName.empty()) {
1645 TString checkupdate = fOptions.fMode;
1646 checkupdate.ToLower();
1647 if (checkupdate == "update")
1648 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
1649 else
1650 outputDir = fOutputFile->mkdir(fDirName.c_str());
1651 }
1652
1653 fOutputTree =
1654 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/outputDir);
1655
1656 if (fOptions.fAutoFlush)
1657 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
1658 }
1659
1660 void Finalize()
1661 {
1662 assert(fOutputTree != nullptr);
1663 assert(fOutputFile != nullptr);
1664
1665 // There were no entries to fill the TTree with (either the input TTree was empty or no event passed after
1666 // filtering). We have already created an empty TTree, now also create the branches to preserve the schema
1667 if (fOutputTree->GetEntries() == 0) {
1668 using ind_t = std::index_sequence_for<ColTypes...>;
1670 }
1671 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
1672 fOutputTree->AutoSave("flushbaskets");
1673 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
1674 fOutputTree.reset();
1675 fOutputFile->Close();
1676
1677 // Now connect the data source to the loop manager so it can be used for further processing
1678 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
1679 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
1680 }
1681
1682 std::string GetActionName() { return "Snapshot"; }
1683
1684 ROOT::RDF::SampleCallback_t GetSampleCallback() final
1685 {
1686 return [this](unsigned int, const RSampleInfo &) mutable { fBranchAddressesNeedReset = true; };
1687 }
1688
1689 /**
1690 * @brief Create a new SnapshotTTreeHelper with a different output file name
1691 *
1692 * @param newName A type-erased string with the output file name
1693 * @return SnapshotTTreeHelper
1694 *
1695 * This MakeNew implementation is tied to the cloning feature of actions
1696 * of the computation graph. In particular, cloning a Snapshot node usually
1697 * also involves changing the name of the output file, otherwise the cloned
1698 * Snapshot would overwrite the same file.
1699 */
1700 SnapshotTTreeHelper MakeNew(void *newName, std::string_view /*variation*/ = "nominal")
1701 {
1702 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
1704 fDirName,
1705 fTreeName,
1708 fOptions,
1709 std::vector<bool>(fIsDefine),
1712 }
1713};
1714
1715/// Helper object for a multi-thread TTree-based Snapshot action
1716template <typename... ColTypes>
1717class R__CLING_PTRCHECK(off) SnapshotTTreeHelperMT : public RActionImpl<SnapshotTTreeHelperMT<ColTypes...>> {
1718 unsigned int fNSlots;
1719 std::unique_ptr<ROOT::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
1720 std::vector<std::shared_ptr<ROOT::TBufferMergerFile>> fOutputFiles;
1721 std::vector<std::unique_ptr<TTree>> fOutputTrees;
1722 std::vector<int> fBranchAddressesNeedReset; // vector<bool> does not allow concurrent writing of different elements
1723 std::string fFileName; // name of the output file name
1724 std::string fDirName; // name of TFile subdirectory in which output must be written (possibly empty)
1725 std::string fTreeName; // name of output tree
1726 RSnapshotOptions fOptions; // struct holding options to pass down to TFile and TTree in this action
1727 ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1728 ColumnNames_t fOutputBranchNames;
1729 std::vector<TTree *> fInputTrees; // Current input trees. Set at initialization time (`InitTask`)
1730 // Addresses of branches in output per slot, non-null only for the ones holding C arrays
1731 std::vector<std::vector<TBranch *>> fBranches;
1732 // Addresses associated to output branches per slot, non-null only for the ones holding C arrays
1733 std::vector<std::vector<void *>> fBranchAddresses;
1734 std::vector<RBranchSet> fOutputBranches;
1735 std::vector<bool> fIsDefine;
1738 TFile *fOutputFile; // Non-owning view on the output file
1739
1740public:
1741 using ColumnTypes_t = TypeList<ColTypes...>;
1742
1743 SnapshotTTreeHelperMT(const unsigned int nSlots, std::string_view filename, std::string_view dirname,
1744 std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
1745 const RSnapshotOptions &options, std::vector<bool> &&isDefine,
1747 : fNSlots(nSlots),
1748 fOutputFiles(fNSlots),
1749 fOutputTrees(fNSlots),
1750 fBranchAddressesNeedReset(fNSlots, 1),
1751 fFileName(filename),
1752 fDirName(dirname),
1753 fTreeName(treename),
1754 fOptions(options),
1757 fInputTrees(fNSlots),
1758 fBranches(fNSlots, std::vector<TBranch *>(vbnames.size(), nullptr)),
1759 fBranchAddresses(fNSlots, std::vector<void *>(vbnames.size(), nullptr)),
1760 fOutputBranches(fNSlots),
1761 fIsDefine(std::move(isDefine)),
1764 {
1765 EnsureValidSnapshotTTreeOutput(fOptions, fTreeName, fFileName);
1766 }
1770 {
1771 if (!fTreeName.empty() /*not moved from*/ && fOptions.fLazy && !fOutputFiles.empty() &&
1772 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &f) { return !f; }) /* never run */) {
1773 const auto fileOpenMode = [&]() {
1774 TString checkupdate = fOptions.fMode;
1775 checkupdate.ToLower();
1776 return checkupdate == "update" ? "updated" : "created";
1777 }();
1778 Warning("Snapshot",
1779 "A lazy Snapshot action was booked but never triggered. The tree '%s' in output file '%s' was not %s. "
1780 "In case it was desired instead, remember to trigger the Snapshot operation, by storing "
1781 "its result in a variable and for example calling the GetValue() method on it.",
1782 fTreeName.c_str(), fFileName.c_str(), fileOpenMode);
1783 }
1784 }
1785
1786 void InitTask(TTreeReader *r, unsigned int slot)
1787 {
1788 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
1789 if (!fOutputFiles[slot]) {
1790 // first time this thread executes something, let's create a TBufferMerger output directory
1791 fOutputFiles[slot] = fMerger->GetFile();
1792 }
1794 if (!fDirName.empty()) {
1795 // call returnExistingDirectory=true since MT can end up making this call multiple times
1796 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
1797 }
1798 // re-create output tree as we need to create its branches again, with new input variables
1799 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
1801 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
1803 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
1804 fOutputTrees[slot]->SetImplicitMT(false);
1805 if (fOptions.fAutoFlush)
1806 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
1807 if (r) {
1808 // We could be getting a task-local TTreeReader from the TTreeProcessorMT.
1809 fInputTrees[slot] = r->GetTree();
1810 } else {
1811 fInputTrees[slot] = fInputLoopManager->GetTree();
1812 }
1813 fBranchAddressesNeedReset[slot] = 1; // reset first event flag for this slot
1814 }
1815
1816 void FinalizeTask(unsigned int slot)
1817 {
1818 if (fOutputTrees[slot]->GetEntries() > 0)
1819 fOutputFiles[slot]->Write();
1820 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
1821 fOutputTrees[slot].reset(nullptr);
1822 fOutputBranches[slot].Clear();
1823 }
1824
1825 void Exec(unsigned int slot, ColTypes &... values)
1826 {
1827 using ind_t = std::index_sequence_for<ColTypes...>;
1828 if (fBranchAddressesNeedReset[slot] == 0) {
1829 UpdateCArraysPtrs(slot, values..., ind_t{});
1830 } else {
1831 SetBranches(slot, values..., ind_t{});
1833 }
1834 fOutputTrees[slot]->Fill();
1835 auto entries = fOutputTrees[slot]->GetEntries();
1836 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
1837 if ((autoFlush > 0) && (entries % autoFlush == 0))
1838 fOutputFiles[slot]->Write();
1839 }
1840
1841 template <std::size_t... S>
1842 void UpdateCArraysPtrs(unsigned int slot, ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1843 {
1844 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1845 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1846 // leaving associated to the branch of the output tree an invalid pointer.
1847 // With this code, we set the value of the pointer in the output branch anew when needed.
1848 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1849 // we need an int for the expander list.
1850 int expander[] = {(fBranches[slot][S] && fBranchAddresses[slot][S] != GetData(values)
1851 ? fBranches[slot][S]->SetAddress(GetData(values)),
1852 fBranchAddresses[slot][S] = GetData(values), 0 : 0, 0)...,
1853 0};
1854 (void)expander; // avoid unused parameter warnings (gcc 12.1)
1855 (void)slot; // Also "slot" might be unused, in case "values" is empty
1856 }
1857
1858 template <std::size_t... S>
1859 void SetBranches(unsigned int slot, ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1860 {
1861 // hack to call TTree::Branch on all variadic template arguments
1862 int expander[] = {(SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S],
1863 fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S],
1864 &values, fOutputBranches[slot], fIsDefine[S], fOptions.fBasketSize),
1865 0)...,
1866 0};
1867 fOutputBranches[slot].AssertNoNullBranchAddresses();
1868 (void)expander; // avoid unused parameter warnings (gcc 12.1)
1869 }
1870
1871 template <std::size_t... S>
1872 void SetEmptyBranches(TTree *inputTree, TTree &outputTree, std::index_sequence<S...>)
1873 {
1875 // We use the expander trick rather than a fold expression to avoid incurring in the bracket depth limit of clang
1877 fOutputBranchNames[S], typeid(ColTypes), fOptions.fBasketSize),
1878 0)...,
1879 0};
1880 (void)expander;
1881 }
1882
1883 void Initialize()
1884 {
1886 auto outFile = std::unique_ptr<TFile>{
1887 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/fFileName.c_str(), cs)};
1888 if (!outFile)
1889 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
1890 fOutputFile = outFile.get();
1891 fMerger = std::make_unique<ROOT::TBufferMerger>(std::move(outFile));
1892 }
1893
1894 void Finalize()
1895 {
1896 assert(std::any_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &ptr) { return ptr != nullptr; }));
1897
1898 for (auto &file : fOutputFiles) {
1899 if (file) {
1900 file->Write();
1901 file->Close();
1902 }
1903 }
1904
1905 // If there were no entries to fill the TTree with (either the input TTree was empty or no event passed after
1906 // filtering), create an empty TTree in the output file and create the branches to preserve the schema
1907 auto fullTreeName = fDirName.empty() ? fTreeName : fDirName + '/' + fTreeName;
1908 assert(fOutputFile && "Missing output file in Snapshot finalization.");
1909 if (!fOutputFile->Get(fullTreeName.c_str())) {
1910
1911 // First find in which directory we need to write the output TTree
1912 TDirectory *treeDirectory = fOutputFile;
1913 if (!fDirName.empty()) {
1914 treeDirectory = fOutputFile->mkdir(fDirName.c_str(), "", true);
1915 }
1917
1918 // Create the output TTree and create the user-requested branches
1919 auto outTree =
1920 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
1921 using ind_t = std::index_sequence_for<ColTypes...>;
1923
1924 fOutputFile->Write();
1925 }
1926
1927 // flush all buffers to disk by destroying the TBufferMerger
1928 fOutputFiles.clear();
1929 fMerger.reset();
1930
1931 // Now connect the data source to the loop manager so it can be used for further processing
1932 fOutputLoopManager->SetDataSource(std::make_unique<ROOT::Internal::RDF::RTTreeDS>(fullTreeName, fFileName));
1933 }
1934
1935 std::string GetActionName() { return "Snapshot"; }
1936
1937 ROOT::RDF::SampleCallback_t GetSampleCallback() final
1938 {
1939 return [this](unsigned int slot, const RSampleInfo &) mutable { fBranchAddressesNeedReset[slot] = 1; };
1940 }
1941
1942 /**
1943 * @brief Create a new SnapshotTTreeHelperMT with a different output file name
1944 *
1945 * @param newName A type-erased string with the output file name
1946 * @return SnapshotTTreeHelperMT
1947 *
1948 * This MakeNew implementation is tied to the cloning feature of actions
1949 * of the computation graph. In particular, cloning a Snapshot node usually
1950 * also involves changing the name of the output file, otherwise the cloned
1951 * Snapshot would overwrite the same file.
1952 */
1953 SnapshotTTreeHelperMT MakeNew(void *newName, std::string_view /*variation*/ = "nominal")
1954 {
1955 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
1956 return SnapshotTTreeHelperMT{fNSlots,
1957 finalName,
1958 fDirName,
1959 fTreeName,
1962 fOptions,
1963 std::vector<bool>(fIsDefine),
1966 }
1967};
1968
1969/// Ensure that the RNTuple with the resulting snapshot can be written to the target TFile. This means checking that the
1970/// TFile can be opened in the mode specified in `opts`, deleting any existing RNTuples in case
1971/// `opts.fOverwriteIfExists = true`, or throwing an error otherwise.
1973 const std::string &fileName);
1974
1975/// Helper function to update the value of an RNTuple's field in the provided entry.
1976template <typename T>
1977void SetFieldsHelper(T &value, std::string_view fieldName, ROOT::REntry *entry)
1978{
1979 entry->BindRawPtr(fieldName, &value);
1980}
1981
1982/// Helper object for a single-thread RNTuple-based Snapshot action
1983template <typename... ColTypes>
1984class R__CLING_PTRCHECK(off) SnapshotRNTupleHelper : public RActionImpl<SnapshotRNTupleHelper<ColTypes...>> {
1985 std::string fFileName;
1986 std::string fDirName;
1987 std::string fNTupleName;
1988
1989 std::unique_ptr<TFile> fOutputFile{nullptr};
1990
1991 RSnapshotOptions fOptions;
1993 ColumnNames_t fInputFieldNames; // This contains the resolved aliases
1994 ColumnNames_t fOutputFieldNames;
1995 std::unique_ptr<ROOT::RNTupleWriter> fWriter{nullptr};
1996
1998
1999 std::vector<bool> fIsDefine;
2000
2001public:
2002 using ColumnTypes_t = TypeList<ColTypes...>;
2003 SnapshotRNTupleHelper(std::string_view filename, std::string_view dirname, std::string_view ntuplename,
2004 const ColumnNames_t &vfnames, const ColumnNames_t &fnames, const RSnapshotOptions &options,
2005 ROOT::Detail::RDF::RLoopManager *lm, std::vector<bool> &&isDefine)
2006 : fFileName(filename),
2007 fDirName(dirname),
2008 fNTupleName(ntuplename),
2009 fOptions(options),
2013 fIsDefine(std::move(isDefine))
2014 {
2015 EnsureValidSnapshotRNTupleOutput(fOptions, fNTupleName, fFileName);
2016 }
2017
2023 {
2024 if (!fNTupleName.empty() && !fOutputLoopManager->GetDataSource() && fOptions.fLazy)
2025 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
2026 }
2027
2028 void InitTask(TTreeReader *, unsigned int /* slot */) {}
2029
2030 void Exec(unsigned int /* slot */, ColTypes &...values)
2031 {
2032 using ind_t = std::index_sequence_for<ColTypes...>;
2033
2034 SetFields(values..., ind_t{});
2035 fWriter->Fill();
2036 }
2037
2038 template <std::size_t... S>
2039 void SetFields(ColTypes &...values, std::index_sequence<S...> /*dummy*/)
2040 {
2041 int expander[] = {(SetFieldsHelper(values, fOutputFieldNames[S], fOutputEntry), 0)..., 0};
2042 (void)expander; // avoid unused variable warnings for older compilers (gcc 14.1)
2043 }
2044
2045 void Initialize()
2046 {
2047 using ind_t = std::index_sequence_for<ColTypes...>;
2048
2049 auto model = ROOT::RNTupleModel::Create();
2050 MakeFields(*model, ind_t{});
2051 fOutputEntry = &model->GetDefaultEntry();
2052
2054 writeOptions.SetCompression(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
2055
2056 fOutputFile.reset(TFile::Open(fFileName.c_str(), fOptions.fMode.c_str()));
2057 if (!fOutputFile)
2058 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
2059
2060 TDirectory *outputDir = fOutputFile.get();
2061 if (!fDirName.empty()) {
2062 TString checkupdate = fOptions.fMode;
2063 checkupdate.ToLower();
2064 if (checkupdate == "update")
2065 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
2066 else
2067 outputDir = fOutputFile->mkdir(fDirName.c_str());
2068 }
2069
2070 fWriter = ROOT::RNTupleWriter::Append(std::move(model), fNTupleName, *outputDir, writeOptions);
2071 }
2072
2073 template <std::size_t... S>
2074 void MakeFields(ROOT::RNTupleModel &model, std::index_sequence<S...> /*dummy*/)
2075 {
2076 int expander[] = {(model.MakeField<ColTypes>(fOutputFieldNames[S]), 0)..., 0};
2077 (void)expander; // avoid unused variable warnings for older compilers (gcc 14.1)
2078 }
2079
2080 void Finalize()
2081 {
2082 fWriter.reset();
2083 // We can now set the data source of the loop manager for the RDataFrame that is returned by the Snapshot call.
2084 fOutputLoopManager->SetDataSource(
2085 std::make_unique<ROOT::RDF::RNTupleDS>(fDirName + "/" + fNTupleName, fFileName));
2086 }
2087
2088 std::string GetActionName() { return "Snapshot"; }
2089
2090 ROOT::RDF::SampleCallback_t GetSampleCallback() final
2091 {
2092 return [](unsigned int, const RSampleInfo &) mutable {};
2093 }
2094
2095 /**
2096 * @brief Create a new SnapshotRNTupleHelper with a different output file name
2097 *
2098 * @param newName A type-erased string with the output file name
2099 * @return SnapshotRNTupleHelper
2100 *
2101 * This MakeNew implementation is tied to the cloning feature of actions
2102 * of the computation graph. In particular, cloning a Snapshot node usually
2103 * also involves changing the name of the output file, otherwise the cloned
2104 * Snapshot would overwrite the same file.
2105 */
2107 {
2108 const std::string finalName = *reinterpret_cast<const std::string *>(newName);
2110 fNTupleName,
2113 fOptions,
2115 std::vector<bool>(fIsDefine)};
2116 }
2117};
2118
2119template <typename Acc, typename Merge, typename R, typename T, typename U,
2120 bool MustCopyAssign = std::is_same<R, U>::value>
2122 : public RActionImpl<AggregateHelper<Acc, Merge, R, T, U, MustCopyAssign>> {
2124 Merge fMerge;
2125 std::shared_ptr<U> fResult;
2127
2128public:
2129 using ColumnTypes_t = TypeList<T>;
2130
2131 AggregateHelper(Acc &&f, Merge &&m, const std::shared_ptr<U> &result, const unsigned int nSlots)
2132 : fAggregate(std::move(f)), fMerge(std::move(m)), fResult(result), fAggregators(nSlots, *result)
2133 {
2134 }
2135
2136 AggregateHelper(Acc &f, Merge &m, const std::shared_ptr<U> &result, const unsigned int nSlots)
2137 : fAggregate(f), fMerge(m), fResult(result), fAggregators(nSlots, *result)
2138 {
2139 }
2140
2141 AggregateHelper(AggregateHelper &&) = default;
2142 AggregateHelper(const AggregateHelper &) = delete;
2143
2144 void InitTask(TTreeReader *, unsigned int) {}
2145
2146 template <bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<MustCopyAssign_, int> = 0>
2147 void Exec(unsigned int slot, const T &value)
2148 {
2150 }
2151
2152 template <bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<!MustCopyAssign_, int> = 0>
2153 void Exec(unsigned int slot, const T &value)
2154 {
2156 }
2157
2158 void Initialize() { /* noop */}
2159
2161 bool MergeAll = std::is_same<void, MergeRet>::value>
2162 std::enable_if_t<MergeAll, void> Finalize()
2163 {
2164 fMerge(fAggregators);
2165 *fResult = fAggregators[0];
2166 }
2167
2169 bool MergeTwoByTwo = std::is_same<U, MergeRet>::value>
2170 std::enable_if_t<MergeTwoByTwo, void> Finalize(...) // ... needed to let compiler distinguish overloads
2171 {
2172 for (const auto &acc : fAggregators)
2173 *fResult = fMerge(*fResult, acc);
2174 }
2175
2176 U &PartialUpdate(unsigned int slot) { return fAggregators[slot]; }
2177
2178 std::string GetActionName() { return "Aggregate"; }
2179
2180 AggregateHelper MakeNew(void *newResult, std::string_view /*variation*/ = "nominal")
2181 {
2182 auto &result = *static_cast<std::shared_ptr<U> *>(newResult);
2183 return AggregateHelper(fAggregate, fMerge, result, fAggregators.size());
2184 }
2185};
2186
2187} // end of NS RDF
2188} // end of NS Internal
2189} // end of NS ROOT
2190
2191/// \endcond
2192
2193#endif
PyObject * fCallable
Handle_t Display_t
Display handle.
Definition GuiTypes.h:27
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define g(i)
Definition RSha256.hxx:105
#define h(i)
Definition RSha256.hxx:106
#define R(a, b, c, d, e, f, g, h, i)
Definition RSha256.hxx:110
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
double Double_t
Definition RtypesCore.h:59
unsigned long long ULong64_t
Definition RtypesCore.h:70
#define X(type, name)
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:229
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char x1
char name[80]
Definition TGX11.cxx:110
void operator=(const TProof &)
Int_t Exec(const char *cmd, ESlaves list, Bool_t plusMaster)
Long64_t Finalize(Int_t query=-1, Bool_t force=kFALSE)
TClass * IsA() const override
Definition TStringLong.h:20
TTime operator*(const TTime &t1, const TTime &t2)
Definition TTime.h:85
Base class for action helpers, see RInterface::Book() for more information.
The head node of a RDF computation graph.
This class is the textual representation of the content of a columnar dataset.
Definition RDisplay.hxx:65
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
The REntry is a collection of values in an ntuple corresponding to a complete row in the data set.
Definition REntry.hxx:54
The RNTupleModel encapulates the schema of an RNTuple.
static std::unique_ptr< RNTupleModel > Create()
std::shared_ptr< T > MakeField(std::string_view name, std::string_view description="")
Creates a new field given a name or {name, description} pair and a corresponding, default-constructed...
Common user-tunable settings for storing RNTuples.
static std::unique_ptr< RNTupleWriter > Append(std::unique_ptr< ROOT::RNTupleModel > model, std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options=ROOT::RNTupleWriteOptions())
Throws an exception if the model is null.
const_iterator begin() const
const_iterator end() const
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1529
A TTree is a list of TBranches.
Definition TBranch.h:93
static TClass * Class()
TClassRef is used to implement a permanent reference to a TClass object.
Definition TClassRef.h:29
Collection abstract base class.
Definition TCollection.h:65
TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE) override
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
TObject * Get(const char *namecycle) override
Return pointer to object identified by namecycle.
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
Describe directory structure in memory.
Definition TDirectory.h:45
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:131
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4131
Int_t Write(const char *name=nullptr, Int_t opt=0, Int_t bufsiz=0) override
Write memory objects to this file.
Definition TFile.cxx:2477
TGraph with asymmetric error bars.
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)
Definition TH1.h:698
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:59
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
A doubly linked list.
Definition TList.h:38
Statistical variable, defined by its mean and variance (RMS).
Definition TStatistic.h:33
Basic string class.
Definition TString.h:139
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:46
A TTree represents a columnar dataset.
Definition TTree.h:79
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition TTree.h:262
RooCmdArg Columns(Int_t ncol)
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
#define H(x, y, z)
std::unique_ptr< RMergeableVariations< T > > GetMergeableValue(ROOT::RDF::Experimental::RResultMap< T > &rmap)
Retrieve mergeable values after calling ROOT::RDF::VariationsFor .
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition RDFUtils.cxx:318
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:263
void EnsureValidSnapshotRNTupleOutput(const RSnapshotOptions &opts, const std::string &ntupleName, const std::string &fileName)
void EnsureValidSnapshotTTreeOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
constexpr std::size_t FindIdxTrue(const T &arr)
Definition Utils.hxx:233
std::function< void(unsigned int, const ROOT::RDF::RSampleInfo &)> SampleCallback_t
The type of a data-block callback, registered with an RDataFrame computation graph via e....
ROOT type_traits extensions.
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
@ kROOTRVec
Definition ESTLType.h:46
@ kSTLvector
Definition ESTLType.h:30
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
RooArgSet S(Args_t &&... args)
Definition RooArgSet.h:200
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition tmvaglob.cxx:176
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
int fBasketSize
Set a custom basket size option.
bool fLazy
Do not start the event loop when Snapshot is called.
int fCompressionLevel
Compression level of output file.
Lightweight storage for a collection of types.
TMarker m
Definition textangle.C:8
TLine l
Definition textangle.C:4
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345