Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ActionHelpers.hxx
Go to the documentation of this file.
1/**
2 \file ROOT/RDF/ActionHelpers.hxx
3 \ingroup dataframe
4 \author Enrico Guiraud, CERN
5 \author Danilo Piparo, CERN
6 \date 2016-12
7 \author Vincenzo Eduardo Padulano
8 \date 2020-06
9*/
10
11/*************************************************************************
12 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. *
13 * All rights reserved. *
14 * *
15 * For the licensing terms see $ROOTSYS/LICENSE. *
16 * For the list of contributors see $ROOTSYS/README/CREDITS. *
17 *************************************************************************/
18
19#ifndef ROOT_RDFOPERATIONS
20#define ROOT_RDFOPERATIONS
21
22#include "Compression.h"
23#include "ROOT/RStringView.hxx"
24#include "ROOT/RVec.hxx"
25#include "ROOT/TBufferMerger.hxx" // for SnapshotHelper
28#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/TypeTraits.hxx"
31#include "ROOT/RDF/RDisplay.hxx"
32#include "RtypesCore.h"
33#include "TBranch.h"
34#include "TClassEdit.h"
35#include "TClassRef.h"
36#include "TDirectory.h"
37#include "TError.h" // for R__ASSERT, Warning
38#include "TFile.h" // for SnapshotHelper
39#include "TH1.h"
40#include "TGraph.h"
41#include "TGraphAsymmErrors.h"
42#include "TLeaf.h"
43#include "TObject.h"
44#include "TTree.h"
45#include "TTreeReader.h" // for SnapshotHelper
46#include "TStatistic.h"
49
50#include <algorithm>
51#include <functional>
52#include <limits>
53#include <memory>
54#include <stdexcept>
55#include <string>
56#include <type_traits>
57#include <utility> // std::index_sequence
58#include <vector>
59#include <iomanip>
60#include <numeric> // std::accumulate in MeanHelper
61
62/// \cond HIDDEN_SYMBOLS
63
64namespace ROOT {
65namespace Internal {
66namespace RDF {
67using namespace ROOT::TypeTraits;
68using namespace ROOT::VecOps;
69using namespace ROOT::RDF;
70using namespace ROOT::Detail::RDF;
71
72using Hist_t = ::TH1D;
73
74class RBranchSet {
75 std::vector<TBranch *> fBranches;
76 std::vector<std::string> fNames;
77
78public:
79 TBranch *Get(const std::string &name) const
80 {
81 auto it = std::find(fNames.begin(), fNames.end(), name);
82 if (it == fNames.end())
83 return nullptr;
84 return fBranches[std::distance(fNames.begin(), it)];
85 }
86
87 void Insert(const std::string &name, TBranch *address)
88 {
89 if (address == nullptr) {
90 throw std::logic_error("Trying to insert a null branch address.");
91 }
92 if (std::find(fBranches.begin(), fBranches.end(), address) != fBranches.end()) {
93 throw std::logic_error("Trying to insert a branch address that's already present.");
94 }
95 if (std::find(fNames.begin(), fNames.end(), name) != fNames.end()) {
96 throw std::logic_error("Trying to insert a branch name that's already present.");
97 }
98 fNames.emplace_back(name);
99 fBranches.emplace_back(address);
100 }
101
102 void Clear()
103 {
104 fBranches.clear();
105 fNames.clear();
106 }
107
108 void AssertNoNullBranchAddresses()
109 {
110 std::vector<TBranch *> branchesWithNullAddress;
111 std::copy_if(fBranches.begin(), fBranches.end(), std::back_inserter(branchesWithNullAddress),
112 [](TBranch *b) { return b->GetAddress() == nullptr; });
113
114 if (branchesWithNullAddress.empty())
115 return;
116
117 // otherwise build error message and throw
118 std::vector<std::string> missingBranchNames;
119 std::transform(branchesWithNullAddress.begin(), branchesWithNullAddress.end(),
120 std::back_inserter(missingBranchNames), [](TBranch *b) { return b->GetName(); });
121 std::string msg = "RDataFrame::Snapshot:";
122 if (missingBranchNames.size() == 1) {
123 msg += " branch " + missingBranchNames[0] +
124 " is needed as it provides the size for one or more branches containing dynamically sized arrays, but "
125 "it is";
126 } else {
127 msg += " branches ";
128 for (const auto &bName : missingBranchNames)
129 msg += bName + ", ";
130 msg.resize(msg.size() - 2); // remove last ", "
131 msg +=
132 " are needed as they provide the size of other branches containing dynamically sized arrays, but they are";
133 }
134 msg += " not part of the set of branches that are being written out.";
135 throw std::runtime_error(msg);
136 }
137};
138
139/// The container type for each thread's partial result in an action helper
140// We have to avoid to instantiate std::vector<bool> as that makes it impossible to return a reference to one of
141// the thread-local results. In addition, a common definition for the type of the container makes it easy to swap
142// the type of the underlying container if e.g. we see problems with false sharing of the thread-local results..
143template <typename T>
144using Results = std::conditional_t<std::is_same<T, bool>::value, std::deque<T>, std::vector<T>>;
145
146template <typename F>
147class R__CLING_PTRCHECK(off) ForeachSlotHelper : public RActionImpl<ForeachSlotHelper<F>> {
148 F fCallable;
149
150public:
152 ForeachSlotHelper(F &&f) : fCallable(f) {}
153 ForeachSlotHelper(ForeachSlotHelper &&) = default;
154 ForeachSlotHelper(const ForeachSlotHelper &) = delete;
155
156 void InitTask(TTreeReader *, unsigned int) {}
157
158 template <typename... Args>
159 void Exec(unsigned int slot, Args &&... args)
160 {
161 // check that the decayed types of Args are the same as the branch types
162 static_assert(std::is_same<TypeList<std::decay_t<Args>...>, ColumnTypes_t>::value, "");
163 fCallable(slot, std::forward<Args>(args)...);
164 }
165
166 void Initialize() { /* noop */}
167
168 void Finalize() { /* noop */}
169
170 std::string GetActionName() { return "ForeachSlot"; }
171};
172
173class R__CLING_PTRCHECK(off) CountHelper : public RActionImpl<CountHelper> {
174 std::shared_ptr<ULong64_t> fResultCount;
175 Results<ULong64_t> fCounts;
176
177public:
178 using ColumnTypes_t = TypeList<>;
179 CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots);
180 CountHelper(CountHelper &&) = default;
181 CountHelper(const CountHelper &) = delete;
182 void InitTask(TTreeReader *, unsigned int) {}
183 void Exec(unsigned int slot);
184 void Initialize() { /* noop */}
185 void Finalize();
186
187 // Helper functions for RMergeableValue
188 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
189 {
190 return std::make_unique<RMergeableCount>(*fResultCount);
191 }
192
193 ULong64_t &PartialUpdate(unsigned int slot);
194
195 std::string GetActionName() { return "Count"; }
196
197 CountHelper MakeNew(void *newResult)
198 {
199 auto &result = *static_cast<std::shared_ptr<ULong64_t> *>(newResult);
200 return CountHelper(result, fCounts.size());
201 }
202};
203
204template <typename RNode_t>
205class R__CLING_PTRCHECK(off) ReportHelper : public RActionImpl<ReportHelper<RNode_t>> {
206 std::shared_ptr<RCutFlowReport> fReport;
207 /// Non-owning pointer, never null. As usual, the node is owned by its children nodes (and therefore indirectly by
208 /// the RAction corresponding to this action helper).
209 RNode_t *fNode;
210 bool fReturnEmptyReport;
211
212public:
213 using ColumnTypes_t = TypeList<>;
214 ReportHelper(const std::shared_ptr<RCutFlowReport> &report, RNode_t *node, bool emptyRep)
215 : fReport(report), fNode(node), fReturnEmptyReport(emptyRep){};
216 ReportHelper(ReportHelper &&) = default;
217 ReportHelper(const ReportHelper &) = delete;
218 void InitTask(TTreeReader *, unsigned int) {}
219 void Exec(unsigned int /* slot */) {}
220 void Initialize() { /* noop */}
221 void Finalize()
222 {
223 if (!fReturnEmptyReport)
224 fNode->Report(*fReport);
225 }
226
227 std::string GetActionName() { return "Report"; }
228
229 // TODO implement MakeNew. Requires some smartness in passing the appropriate previous node.
230};
231
232/// This helper fills TH1Ds for which no axes were specified by buffering the fill values to pick good axes limits.
233///
234/// TH1Ds have an automatic mechanism to pick good limits based on the first N entries they were filled with, but
235/// that does not work in multi-thread event loops as it might yield histograms with incompatible binning in each
236/// thread, making it impossible to merge the per-thread results.
237/// Instead, this helper delays the decision on the axes limits until all threads have done processing, synchronizing
238/// the decision on the limits as part of the merge operation.
239class R__CLING_PTRCHECK(off) BufferedFillHelper : public RActionImpl<BufferedFillHelper> {
240 // this sets a total initial size of 16 MB for the buffers (can increase)
241 static constexpr unsigned int fgTotalBufSize = 2097152;
242 using BufEl_t = double;
243 using Buf_t = std::vector<BufEl_t>;
244
245 std::vector<Buf_t> fBuffers;
246 std::vector<Buf_t> fWBuffers;
247 std::shared_ptr<Hist_t> fResultHist;
248 unsigned int fNSlots;
249 unsigned int fBufSize;
250 /// Histograms containing "snapshots" of partial results. Non-null only if a registered callback requires it.
251 Results<std::unique_ptr<Hist_t>> fPartialHists;
252 Buf_t fMin;
253 Buf_t fMax;
254
255 void UpdateMinMax(unsigned int slot, double v);
256
257public:
258 BufferedFillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots);
259 BufferedFillHelper(BufferedFillHelper &&) = default;
260 BufferedFillHelper(const BufferedFillHelper &) = delete;
261 void InitTask(TTreeReader *, unsigned int) {}
262 void Exec(unsigned int slot, double v);
263 void Exec(unsigned int slot, double v, double w);
264
265 template <typename T, std::enable_if_t<IsDataContainer<T>::value || std::is_same<T, std::string>::value, int> = 0>
266 void Exec(unsigned int slot, const T &vs)
267 {
268 auto &thisBuf = fBuffers[slot];
269 // range-based for results in warnings on some compilers due to vector<bool>'s custom reference type
270 for (auto v = vs.begin(); v != vs.end(); ++v) {
271 UpdateMinMax(slot, *v);
272 thisBuf.emplace_back(*v); // TODO: Can be optimised in case T == BufEl_t
273 }
274 }
275
276 template <typename T, typename W, std::enable_if_t<IsDataContainer<T>::value && IsDataContainer<W>::value, int> = 0>
277 void Exec(unsigned int slot, const T &vs, const W &ws)
278 {
279 auto &thisBuf = fBuffers[slot];
280
281 for (auto &v : vs) {
282 UpdateMinMax(slot, v);
283 thisBuf.emplace_back(v);
284 }
285
286 auto &thisWBuf = fWBuffers[slot];
287 for (auto &w : ws) {
288 thisWBuf.emplace_back(w); // TODO: Can be optimised in case T == BufEl_t
289 }
290 }
291
292 template <typename T, typename W, std::enable_if_t<IsDataContainer<T>::value && !IsDataContainer<W>::value, int> = 0>
293 void Exec(unsigned int slot, const T &vs, const W w)
294 {
295 auto &thisBuf = fBuffers[slot];
296 for (auto &v : vs) {
297 UpdateMinMax(slot, v);
298 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
299 }
300
301 auto &thisWBuf = fWBuffers[slot];
302 thisWBuf.insert(thisWBuf.end(), vs.size(), w);
303 }
304
305 template <typename T, typename W, std::enable_if_t<IsDataContainer<W>::value && !IsDataContainer<T>::value, int> = 0>
306 void Exec(unsigned int slot, const T v, const W &ws)
307 {
308 UpdateMinMax(slot, v);
309 auto &thisBuf = fBuffers[slot];
310 thisBuf.insert(thisBuf.end(), ws.size(), v);
311
312 auto &thisWBuf = fWBuffers[slot];
313 thisWBuf.insert(thisWBuf.end(), ws.begin(), ws.end());
314 }
315
316 Hist_t &PartialUpdate(unsigned int);
317
318 void Initialize() { /* noop */}
319
320 void Finalize();
321
322 // Helper functions for RMergeableValue
323 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
324 {
325 return std::make_unique<RMergeableFill<Hist_t>>(*fResultHist);
326 }
327
328 std::string GetActionName()
329 {
330 return std::string(fResultHist->IsA()->GetName()) + "\\n" + std::string(fResultHist->GetName());
331 }
332
333 BufferedFillHelper MakeNew(void *newResult)
334 {
335 auto &result = *static_cast<std::shared_ptr<Hist_t> *>(newResult);
336 result->Reset();
337 result->SetDirectory(nullptr);
338 return BufferedFillHelper(result, fNSlots);
339 }
340};
341
342extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<float> &);
343extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<double> &);
344extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<char> &);
345extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<int> &);
346extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<unsigned int> &);
347extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<float> &, const std::vector<float> &);
348extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<double> &, const std::vector<double> &);
349extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<char> &, const std::vector<char> &);
350extern template void BufferedFillHelper::Exec(unsigned int, const std::vector<int> &, const std::vector<int> &);
351extern template void
352BufferedFillHelper::Exec(unsigned int, const std::vector<unsigned int> &, const std::vector<unsigned int> &);
353
354/// The generic Fill helper: it calls Fill on per-thread objects and then Merge to produce a final result.
355/// For one-dimensional histograms, if no axes are specified, RDataFrame uses BufferedFillHelper instead.
356template <typename HIST = Hist_t>
357class R__CLING_PTRCHECK(off) FillHelper : public RActionImpl<FillHelper<HIST>> {
358 std::vector<HIST *> fObjects;
359
360 template <typename H = HIST, typename = decltype(std::declval<H>().Reset())>
361 void ResetIfPossible(H *h)
362 {
363 h->Reset();
364 }
365
366 void ResetIfPossible(TStatistic *h) { *h = TStatistic(); }
367
368 // cannot safely re-initialize variations of the result, hence error out
369 void ResetIfPossible(...)
370 {
371 throw std::runtime_error(
372 "A systematic variation was requested for a custom Fill action, but the type of the object to be filled does "
373 "not implement a Reset method, so we cannot safely re-initialize variations of the result. Aborting.");
374 }
375
376 void UnsetDirectoryIfPossible(TH1 *h) {
377 h->SetDirectory(nullptr);
378 }
379
380 void UnsetDirectoryIfPossible(...) {}
381
382 // Merge overload for types with Merge(TCollection*), like TH1s
383 template <typename H, typename = std::enable_if_t<std::is_base_of<TObject, H>::value, int>>
384 auto Merge(std::vector<H *> &objs, int /*toincreaseoverloadpriority*/)
385 -> decltype(objs[0]->Merge((TCollection *)nullptr), void())
386 {
387 TList l;
388 for (auto it = ++objs.begin(); it != objs.end(); ++it)
389 l.Add(*it);
390 objs[0]->Merge(&l);
391 }
392
393 // Merge overload for types with Merge(const std::vector&)
394 template <typename H>
395 auto Merge(std::vector<H *> &objs, double /*toloweroverloadpriority*/)
396 -> decltype(objs[0]->Merge(std::vector<HIST *>{}), void())
397 {
398 objs[0]->Merge({++objs.begin(), objs.end()});
399 }
400
401 // Merge overload to error out in case no valid HIST::Merge method was detected
402 template <typename T>
403 void Merge(T, ...)
404 {
405 static_assert(sizeof(T) < 0,
406 "The type passed to Fill does not provide a Merge(TCollection*) or Merge(const std::vector&) method.");
407 }
408
409 // class which wraps a pointer and implements a no-op increment operator
410 template <typename T>
411 class ScalarConstIterator {
412 const T *obj_;
413
414 public:
415 ScalarConstIterator(const T *obj) : obj_(obj) {}
416 const T &operator*() const { return *obj_; }
417 ScalarConstIterator<T> &operator++() { return *this; }
418 };
419
420 // helper functions which provide one implementation for scalar types and another for containers
421 // TODO these could probably all be replaced by inlined lambdas and/or constexpr if statements
422 // in c++17 or later
423
424 // return unchanged value for scalar
425 template <typename T, std::enable_if_t<!IsDataContainer<T>::value, int> = 0>
426 ScalarConstIterator<T> MakeBegin(const T &val)
427 {
428 return ScalarConstIterator<T>(&val);
429 }
430
431 // return iterator to beginning of container
432 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
433 auto MakeBegin(const T &val)
434 {
435 return std::begin(val);
436 }
437
438 // return 1 for scalars
439 template <typename T, std::enable_if_t<!IsDataContainer<T>::value, int> = 0>
440 std::size_t GetSize(const T &)
441 {
442 return 1;
443 }
444
445 // return container size
446 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
447 std::size_t GetSize(const T &val)
448 {
449#if __cplusplus >= 201703L
450 return std::size(val);
451#else
452 return val.size();
453#endif
454 }
455
456 template <std::size_t ColIdx, typename End_t, typename... Its>
457 void ExecLoop(unsigned int slot, End_t end, Its... its)
458 {
459 auto *thisSlotH = fObjects[slot];
460 // loop increments all of the iterators while leaving scalars unmodified
461 // TODO this could be simplified with fold expressions or std::apply in C++17
462 auto nop = [](auto &&...) {};
463 for (; GetNthElement<ColIdx>(its...) != end; nop(++its...)) {
464 thisSlotH->Fill(*its...);
465 }
466 }
467
468public:
469 FillHelper(FillHelper &&) = default;
470 FillHelper(const FillHelper &) = delete;
471
472 FillHelper(const std::shared_ptr<HIST> &h, const unsigned int nSlots) : fObjects(nSlots, nullptr)
473 {
474 fObjects[0] = h.get();
475 // Initialize all other slots
476 for (unsigned int i = 1; i < nSlots; ++i) {
477 fObjects[i] = new HIST(*fObjects[0]);
478 UnsetDirectoryIfPossible(fObjects[i]);
479 }
480 }
481
482 void InitTask(TTreeReader *, unsigned int) {}
483
484 // no container arguments
485 template <typename... ValTypes, std::enable_if_t<!Disjunction<IsDataContainer<ValTypes>...>::value, int> = 0>
486 auto Exec(unsigned int slot, const ValTypes &...x) -> decltype(fObjects[slot]->Fill(x...), void())
487 {
488 fObjects[slot]->Fill(x...);
489 }
490
491 // at least one container argument
492 template <typename... Xs, std::enable_if_t<Disjunction<IsDataContainer<Xs>...>::value, int> = 0>
493 auto Exec(unsigned int slot, const Xs &...xs) -> decltype(fObjects[slot]->Fill(*MakeBegin(xs)...), void())
494 {
495 // array of bools keeping track of which inputs are containers
496 constexpr std::array<bool, sizeof...(Xs)> isContainer{IsDataContainer<Xs>::value...};
497
498 // index of the first container input
499 constexpr std::size_t colidx = FindIdxTrue(isContainer);
500 // if this happens, there is a bug in the implementation
501 static_assert(colidx < sizeof...(Xs), "Error: index of collection-type argument not found.");
502
503 // get the end iterator to the first container
504 auto const xrefend = std::end(GetNthElement<colidx>(xs...));
505
506 // array of container sizes (1 for scalars)
507 std::array<std::size_t, sizeof...(xs)> sizes = {{GetSize(xs)...}};
508
509 for (std::size_t i = 0; i < sizeof...(xs); ++i) {
510 if (isContainer[i] && sizes[i] != sizes[colidx]) {
511 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
512 }
513 }
514
515 ExecLoop<colidx>(slot, xrefend, MakeBegin(xs)...);
516 }
517
518 template <typename T = HIST>
519 void Exec(...)
520 {
521 static_assert(sizeof(T) < 0,
522 "When filling an object with RDataFrame (e.g. via a Fill action) the number or types of the "
523 "columns passed did not match the signature of the object's `Fill` method.");
524 }
525
526 void Initialize() { /* noop */}
527
528 void Finalize()
529 {
530 if (fObjects.size() == 1)
531 return;
532
533 Merge(fObjects, /*toselectcorrectoverload=*/0);
534
535 // delete the copies we created for the slots other than the first
536 for (auto it = ++fObjects.begin(); it != fObjects.end(); ++it)
537 delete *it;
538 }
539
540 HIST &PartialUpdate(unsigned int slot) { return *fObjects[slot]; }
541
542 // Helper functions for RMergeableValue
543 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
544 {
545 return std::make_unique<RMergeableFill<HIST>>(*fObjects[0]);
546 }
547
548 // if the fObjects vector type is derived from TObject, return the name of the object
549 template <typename T = HIST, std::enable_if_t<std::is_base_of<TObject, T>::value, int> = 0>
550 std::string GetActionName()
551 {
552 return std::string(fObjects[0]->IsA()->GetName()) + "\\n" + std::string(fObjects[0]->GetName());
553 }
554
555 // if fObjects is not derived from TObject, indicate it is some other object
556 template <typename T = HIST, std::enable_if_t<!std::is_base_of<TObject, T>::value, int> = 0>
557 std::string GetActionName()
558 {
559 return "Fill custom object";
560 }
561
562 template <typename H = HIST>
563 FillHelper MakeNew(void *newResult)
564 {
565 auto &result = *static_cast<std::shared_ptr<H> *>(newResult);
566 ResetIfPossible(result.get());
567 UnsetDirectoryIfPossible(result.get());
568 return FillHelper(result, fObjects.size());
569 }
570};
571
572class R__CLING_PTRCHECK(off) FillTGraphHelper : public ROOT::Detail::RDF::RActionImpl<FillTGraphHelper> {
573public:
574 using Result_t = ::TGraph;
575
576private:
577 std::vector<::TGraph *> fGraphs;
578
579public:
580 FillTGraphHelper(FillTGraphHelper &&) = default;
581 FillTGraphHelper(const FillTGraphHelper &) = delete;
582
583 FillTGraphHelper(const std::shared_ptr<::TGraph> &g, const unsigned int nSlots) : fGraphs(nSlots, nullptr)
584 {
585 fGraphs[0] = g.get();
586 // Initialize all other slots
587 for (unsigned int i = 1; i < nSlots; ++i) {
588 fGraphs[i] = new TGraph(*fGraphs[0]);
589 }
590 }
591
592 void Initialize() {}
593 void InitTask(TTreeReader *, unsigned int) {}
594
595 // case: both types are container types
596 template <typename X0, typename X1,
597 std::enable_if_t<IsDataContainer<X0>::value && IsDataContainer<X1>::value, int> = 0>
598 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
599 {
600 if (x0s.size() != x1s.size()) {
601 throw std::runtime_error("Cannot fill Graph with values in containers of different sizes.");
602 }
603 auto *thisSlotG = fGraphs[slot];
604 auto x0sIt = std::begin(x0s);
605 const auto x0sEnd = std::end(x0s);
606 auto x1sIt = std::begin(x1s);
607 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
608 thisSlotG->SetPoint(thisSlotG->GetN(), *x0sIt, *x1sIt);
609 }
610 }
611
612 // case: both types are non-container types, e.g. scalars
613 template <typename X0, typename X1,
614 std::enable_if_t<!IsDataContainer<X0>::value && !IsDataContainer<X1>::value, int> = 0>
615 void Exec(unsigned int slot, X0 x0, X1 x1)
616 {
617 auto thisSlotG = fGraphs[slot];
618 thisSlotG->SetPoint(thisSlotG->GetN(), x0, x1);
619 }
620
621 // case: types are combination of containers and non-containers
622 // this is not supported, error out
623 template <typename X0, typename X1, typename... ExtraArgsToLowerPriority>
624 void Exec(unsigned int, X0, X1, ExtraArgsToLowerPriority...)
625 {
626 throw std::runtime_error("Graph was applied to a mix of scalar values and collections. This is not supported.");
627 }
628
629 void Finalize()
630 {
631 const auto nSlots = fGraphs.size();
632 auto resGraph = fGraphs[0];
633 TList l;
634 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
635 for (unsigned int slot = 1; slot < nSlots; ++slot) {
636 l.Add(fGraphs[slot]);
637 }
638 resGraph->Merge(&l);
639 }
640
641 // Helper functions for RMergeableValue
642 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
643 {
644 return std::make_unique<RMergeableFill<Result_t>>(*fGraphs[0]);
645 }
646
647 std::string GetActionName() { return "Graph"; }
648
649 Result_t &PartialUpdate(unsigned int slot) { return *fGraphs[slot]; }
650
651 FillTGraphHelper MakeNew(void *newResult)
652 {
653 auto &result = *static_cast<std::shared_ptr<TGraph> *>(newResult);
654 result->Set(0);
655 return FillTGraphHelper(result, fGraphs.size());
656 }
657};
658
659class R__CLING_PTRCHECK(off) FillTGraphAsymmErrorsHelper
660 : public ROOT::Detail::RDF::RActionImpl<FillTGraphAsymmErrorsHelper> {
661public:
662 using Result_t = ::TGraphAsymmErrors;
663
664private:
665 std::vector<::TGraphAsymmErrors *> fGraphAsymmErrors;
666
667public:
668 FillTGraphAsymmErrorsHelper(FillTGraphAsymmErrorsHelper &&) = default;
669 FillTGraphAsymmErrorsHelper(const FillTGraphAsymmErrorsHelper &) = delete;
670
671 FillTGraphAsymmErrorsHelper(const std::shared_ptr<::TGraphAsymmErrors> &g, const unsigned int nSlots)
672 : fGraphAsymmErrors(nSlots, nullptr)
673 {
674 fGraphAsymmErrors[0] = g.get();
675 // Initialize all other slots
676 for (unsigned int i = 1; i < nSlots; ++i) {
677 fGraphAsymmErrors[i] = new TGraphAsymmErrors(*fGraphAsymmErrors[0]);
678 }
679 }
680
681 void Initialize() {}
682 void InitTask(TTreeReader *, unsigned int) {}
683
684 // case: all types are container types
685 template <
686 typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
687 std::enable_if_t<IsDataContainer<X>::value && IsDataContainer<Y>::value && IsDataContainer<EXL>::value &&
688 IsDataContainer<EXH>::value && IsDataContainer<EYL>::value && IsDataContainer<EYH>::value,
689 int> = 0>
690 void
691 Exec(unsigned int slot, const X &xs, const Y &ys, const EXL &exls, const EXH &exhs, const EYL &eyls, const EYH &eyhs)
692 {
693 if ((xs.size() != ys.size()) || (xs.size() != exls.size()) || (xs.size() != exhs.size()) ||
694 (xs.size() != eyls.size()) || (xs.size() != eyhs.size())) {
695 throw std::runtime_error("Cannot fill GraphAsymmErrors with values in containers of different sizes.");
696 }
697 auto *thisSlotG = fGraphAsymmErrors[slot];
698 auto xsIt = std::begin(xs);
699 auto ysIt = std::begin(ys);
700 auto exlsIt = std::begin(exls);
701 auto exhsIt = std::begin(exhs);
702 auto eylsIt = std::begin(eyls);
703 auto eyhsIt = std::begin(eyhs);
704 while (xsIt != std::end(xs)) {
705 const auto n = thisSlotG->GetN(); // must use the same `n` for SetPoint and SetPointError
706 thisSlotG->SetPoint(n, *xsIt++, *ysIt++);
707 thisSlotG->SetPointError(n, *exlsIt++, *exhsIt++, *eylsIt++, *eyhsIt++);
708 }
709 }
710
711 // case: all types are non-container types, e.g. scalars
712 template <
713 typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
714 std::enable_if_t<!IsDataContainer<X>::value && !IsDataContainer<Y>::value && !IsDataContainer<EXL>::value &&
715 !IsDataContainer<EXH>::value && !IsDataContainer<EYL>::value && !IsDataContainer<EYH>::value,
716 int> = 0>
717 void Exec(unsigned int slot, X x, Y y, EXL exl, EXH exh, EYL eyl, EYH eyh)
718 {
719 auto thisSlotG = fGraphAsymmErrors[slot];
720 const auto n = thisSlotG->GetN();
721 thisSlotG->SetPoint(n, x, y);
722 thisSlotG->SetPointError(n, exl, exh, eyl, eyh);
723 }
724
725 // case: types are combination of containers and non-containers
726 // this is not supported, error out
727 template <typename X, typename Y, typename EXL, typename EXH, typename EYL, typename EYH,
728 typename... ExtraArgsToLowerPriority>
729 void Exec(unsigned int, X, Y, EXL, EXH, EYL, EYH, ExtraArgsToLowerPriority...)
730 {
731 throw std::runtime_error(
732 "GraphAsymmErrors was applied to a mix of scalar values and collections. This is not supported.");
733 }
734
735 void Finalize()
736 {
737 const auto nSlots = fGraphAsymmErrors.size();
738 auto resGraphAsymmErrors = fGraphAsymmErrors[0];
739 TList l;
740 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
741 for (unsigned int slot = 1; slot < nSlots; ++slot) {
742 l.Add(fGraphAsymmErrors[slot]);
743 }
744 resGraphAsymmErrors->Merge(&l);
745 }
746
747 // Helper functions for RMergeableValue
748 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
749 {
750 return std::make_unique<RMergeableFill<Result_t>>(*fGraphAsymmErrors[0]);
751 }
752
753 std::string GetActionName() { return "GraphAsymmErrors"; }
754
755 Result_t &PartialUpdate(unsigned int slot) { return *fGraphAsymmErrors[slot]; }
756
757 FillTGraphAsymmErrorsHelper MakeNew(void *newResult)
758 {
759 auto &result = *static_cast<std::shared_ptr<TGraphAsymmErrors> *>(newResult);
760 result->Set(0);
761 return FillTGraphAsymmErrorsHelper(result, fGraphAsymmErrors.size());
762 }
763};
764
765// In case of the take helper we have 4 cases:
766// 1. The column is not an RVec, the collection is not a vector
767// 2. The column is not an RVec, the collection is a vector
768// 3. The column is an RVec, the collection is not a vector
769// 4. The column is an RVec, the collection is a vector
770
771template <typename V, typename COLL>
772void FillColl(V&& v, COLL& c) {
773 c.emplace_back(v);
774}
775
776// Use push_back for bool since some compilers do not support emplace_back.
777template <typename COLL>
778void FillColl(bool v, COLL& c) {
779 c.push_back(v);
780}
781
782// Case 1.: The column is not an RVec, the collection is not a vector
783// No optimisations, no transformations: just copies.
784template <typename RealT_t, typename T, typename COLL>
785class R__CLING_PTRCHECK(off) TakeHelper : public RActionImpl<TakeHelper<RealT_t, T, COLL>> {
786 Results<std::shared_ptr<COLL>> fColls;
787
788public:
789 using ColumnTypes_t = TypeList<T>;
790 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
791 {
792 fColls.emplace_back(resultColl);
793 for (unsigned int i = 1; i < nSlots; ++i)
794 fColls.emplace_back(std::make_shared<COLL>());
795 }
796 TakeHelper(TakeHelper &&);
797 TakeHelper(const TakeHelper &) = delete;
798
799 void InitTask(TTreeReader *, unsigned int) {}
800
801 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
802
803 void Initialize() { /* noop */}
804
805 void Finalize()
806 {
807 auto rColl = fColls[0];
808 for (unsigned int i = 1; i < fColls.size(); ++i) {
809 const auto &coll = fColls[i];
810 const auto end = coll->end();
811 // Use an explicit loop here to prevent compiler warnings introduced by
812 // clang's range-based loop analysis and vector<bool> references.
813 for (auto j = coll->begin(); j != end; j++) {
814 FillColl(*j, *rColl);
815 }
816 }
817 }
818
819 COLL &PartialUpdate(unsigned int slot) { return *fColls[slot].get(); }
820
821 std::string GetActionName() { return "Take"; }
822
823 TakeHelper MakeNew(void *newResult)
824 {
825 auto &result = *static_cast<std::shared_ptr<COLL> *>(newResult);
826 result->clear();
827 return TakeHelper(result, fColls.size());
828 }
829};
830
831// Case 2.: The column is not an RVec, the collection is a vector
832// Optimisations, no transformations: just copies.
833template <typename RealT_t, typename T>
834class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, T, std::vector<T>>
835 : public RActionImpl<TakeHelper<RealT_t, T, std::vector<T>>> {
836 Results<std::shared_ptr<std::vector<T>>> fColls;
837
838public:
839 using ColumnTypes_t = TypeList<T>;
840 TakeHelper(const std::shared_ptr<std::vector<T>> &resultColl, const unsigned int nSlots)
841 {
842 fColls.emplace_back(resultColl);
843 for (unsigned int i = 1; i < nSlots; ++i) {
844 auto v = std::make_shared<std::vector<T>>();
845 v->reserve(1024);
846 fColls.emplace_back(v);
847 }
848 }
849 TakeHelper(TakeHelper &&);
850 TakeHelper(const TakeHelper &) = delete;
851
852 void InitTask(TTreeReader *, unsigned int) {}
853
854 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
855
856 void Initialize() { /* noop */}
857
858 // This is optimised to treat vectors
859 void Finalize()
860 {
861 ULong64_t totSize = 0;
862 for (auto &coll : fColls)
863 totSize += coll->size();
864 auto rColl = fColls[0];
865 rColl->reserve(totSize);
866 for (unsigned int i = 1; i < fColls.size(); ++i) {
867 auto &coll = fColls[i];
868 rColl->insert(rColl->end(), coll->begin(), coll->end());
869 }
870 }
871
872 std::vector<T> &PartialUpdate(unsigned int slot) { return *fColls[slot]; }
873
874 std::string GetActionName() { return "Take"; }
875
876 TakeHelper MakeNew(void *newResult)
877 {
878 auto &result = *static_cast<std::shared_ptr<std::vector<T>> *>(newResult);
879 result->clear();
880 return TakeHelper(result, fColls.size());
881 }
882};
883
884// Case 3.: The column is a RVec, the collection is not a vector
885// No optimisations, transformations from RVecs to vectors
886template <typename RealT_t, typename COLL>
887class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, RVec<RealT_t>, COLL>
888 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, COLL>> {
889 Results<std::shared_ptr<COLL>> fColls;
890
891public:
892 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
893 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
894 {
895 fColls.emplace_back(resultColl);
896 for (unsigned int i = 1; i < nSlots; ++i)
897 fColls.emplace_back(std::make_shared<COLL>());
898 }
899 TakeHelper(TakeHelper &&);
900 TakeHelper(const TakeHelper &) = delete;
901
902 void InitTask(TTreeReader *, unsigned int) {}
903
904 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
905
906 void Initialize() { /* noop */}
907
908 void Finalize()
909 {
910 auto rColl = fColls[0];
911 for (unsigned int i = 1; i < fColls.size(); ++i) {
912 auto &coll = fColls[i];
913 for (auto &v : *coll) {
914 rColl->emplace_back(v);
915 }
916 }
917 }
918
919 std::string GetActionName() { return "Take"; }
920
921 TakeHelper MakeNew(void *newResult)
922 {
923 auto &result = *static_cast<std::shared_ptr<COLL> *>(newResult);
924 result->clear();
925 return TakeHelper(result, fColls.size());
926 }
927};
928
929// Case 4.: The column is an RVec, the collection is a vector
930// Optimisations, transformations from RVecs to vectors
931template <typename RealT_t>
932class R__CLING_PTRCHECK(off) TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>
933 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>> {
934
935 Results<std::shared_ptr<std::vector<std::vector<RealT_t>>>> fColls;
936
937public:
938 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
939 TakeHelper(const std::shared_ptr<std::vector<std::vector<RealT_t>>> &resultColl, const unsigned int nSlots)
940 {
941 fColls.emplace_back(resultColl);
942 for (unsigned int i = 1; i < nSlots; ++i) {
943 auto v = std::make_shared<std::vector<RealT_t>>();
944 v->reserve(1024);
945 fColls.emplace_back(v);
946 }
947 }
948 TakeHelper(TakeHelper &&);
949 TakeHelper(const TakeHelper &) = delete;
950
951 void InitTask(TTreeReader *, unsigned int) {}
952
953 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
954
955 void Initialize() { /* noop */}
956
957 // This is optimised to treat vectors
958 void Finalize()
959 {
960 ULong64_t totSize = 0;
961 for (auto &coll : fColls)
962 totSize += coll->size();
963 auto rColl = fColls[0];
964 rColl->reserve(totSize);
965 for (unsigned int i = 1; i < fColls.size(); ++i) {
966 auto &coll = fColls[i];
967 rColl->insert(rColl->end(), coll->begin(), coll->end());
968 }
969 }
970
971 std::string GetActionName() { return "Take"; }
972
973 TakeHelper MakeNew(void *newResult)
974 {
975 auto &result = *static_cast<typename decltype(fColls)::value_type *>(newResult);
976 result->clear();
977 return TakeHelper(result, fColls.size());
978 }
979};
980
981// Extern templates for TakeHelper
982// NOTE: The move-constructor of specializations declared as extern templates
983// must be defined out of line, otherwise cling fails to find its symbol.
984template <typename RealT_t, typename T, typename COLL>
985TakeHelper<RealT_t, T, COLL>::TakeHelper(TakeHelper<RealT_t, T, COLL> &&) = default;
986template <typename RealT_t, typename T>
987TakeHelper<RealT_t, T, std::vector<T>>::TakeHelper(TakeHelper<RealT_t, T, std::vector<T>> &&) = default;
988template <typename RealT_t, typename COLL>
989TakeHelper<RealT_t, RVec<RealT_t>, COLL>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, COLL> &&) = default;
990template <typename RealT_t>
991TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>> &&) = default;
992
993// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
994#if __GNUC__ > 5
995extern template class TakeHelper<bool, bool, std::vector<bool>>;
996extern template class TakeHelper<unsigned int, unsigned int, std::vector<unsigned int>>;
997extern template class TakeHelper<unsigned long, unsigned long, std::vector<unsigned long>>;
998extern template class TakeHelper<unsigned long long, unsigned long long, std::vector<unsigned long long>>;
999extern template class TakeHelper<int, int, std::vector<int>>;
1000extern template class TakeHelper<long, long, std::vector<long>>;
1001extern template class TakeHelper<long long, long long, std::vector<long long>>;
1002extern template class TakeHelper<float, float, std::vector<float>>;
1003extern template class TakeHelper<double, double, std::vector<double>>;
1004#endif
1005
1006template <typename ResultType>
1007class R__CLING_PTRCHECK(off) MinHelper : public RActionImpl<MinHelper<ResultType>> {
1008 std::shared_ptr<ResultType> fResultMin;
1009 Results<ResultType> fMins;
1010
1011public:
1012 MinHelper(MinHelper &&) = default;
1013 MinHelper(const std::shared_ptr<ResultType> &minVPtr, const unsigned int nSlots)
1014 : fResultMin(minVPtr), fMins(nSlots, std::numeric_limits<ResultType>::max())
1015 {
1016 }
1017
1018 void Exec(unsigned int slot, ResultType v) { fMins[slot] = std::min(v, fMins[slot]); }
1019
1020 void InitTask(TTreeReader *, unsigned int) {}
1021
1022 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1023 void Exec(unsigned int slot, const T &vs)
1024 {
1025 for (auto &&v : vs)
1026 fMins[slot] = std::min(static_cast<ResultType>(v), fMins[slot]);
1027 }
1028
1029 void Initialize() { /* noop */}
1030
1031 void Finalize()
1032 {
1033 *fResultMin = std::numeric_limits<ResultType>::max();
1034 for (auto &m : fMins)
1035 *fResultMin = std::min(m, *fResultMin);
1036 }
1037
1038 // Helper functions for RMergeableValue
1039 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1040 {
1041 return std::make_unique<RMergeableMin<ResultType>>(*fResultMin);
1042 }
1043
1044 ResultType &PartialUpdate(unsigned int slot) { return fMins[slot]; }
1045
1046 std::string GetActionName() { return "Min"; }
1047
1048 MinHelper MakeNew(void *newResult)
1049 {
1050 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1051 return MinHelper(result, fMins.size());
1052 }
1053};
1054
1055// TODO
1056// extern template void MinHelper::Exec(unsigned int, const std::vector<float> &);
1057// extern template void MinHelper::Exec(unsigned int, const std::vector<double> &);
1058// extern template void MinHelper::Exec(unsigned int, const std::vector<char> &);
1059// extern template void MinHelper::Exec(unsigned int, const std::vector<int> &);
1060// extern template void MinHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1061
1062template <typename ResultType>
1063class R__CLING_PTRCHECK(off) MaxHelper : public RActionImpl<MaxHelper<ResultType>> {
1064 std::shared_ptr<ResultType> fResultMax;
1065 Results<ResultType> fMaxs;
1066
1067public:
1068 MaxHelper(MaxHelper &&) = default;
1069 MaxHelper(const MaxHelper &) = delete;
1070 MaxHelper(const std::shared_ptr<ResultType> &maxVPtr, const unsigned int nSlots)
1071 : fResultMax(maxVPtr), fMaxs(nSlots, std::numeric_limits<ResultType>::lowest())
1072 {
1073 }
1074
1075 void InitTask(TTreeReader *, unsigned int) {}
1076 void Exec(unsigned int slot, ResultType v) { fMaxs[slot] = std::max(v, fMaxs[slot]); }
1077
1078 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1079 void Exec(unsigned int slot, const T &vs)
1080 {
1081 for (auto &&v : vs)
1082 fMaxs[slot] = std::max(static_cast<ResultType>(v), fMaxs[slot]);
1083 }
1084
1085 void Initialize() { /* noop */}
1086
1087 void Finalize()
1088 {
1089 *fResultMax = std::numeric_limits<ResultType>::lowest();
1090 for (auto &m : fMaxs) {
1091 *fResultMax = std::max(m, *fResultMax);
1092 }
1093 }
1094
1095 // Helper functions for RMergeableValue
1096 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1097 {
1098 return std::make_unique<RMergeableMax<ResultType>>(*fResultMax);
1099 }
1100
1101 ResultType &PartialUpdate(unsigned int slot) { return fMaxs[slot]; }
1102
1103 std::string GetActionName() { return "Max"; }
1104
1105 MaxHelper MakeNew(void *newResult)
1106 {
1107 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1108 return MaxHelper(result, fMaxs.size());
1109 }
1110};
1111
1112// TODO
1113// extern template void MaxHelper::Exec(unsigned int, const std::vector<float> &);
1114// extern template void MaxHelper::Exec(unsigned int, const std::vector<double> &);
1115// extern template void MaxHelper::Exec(unsigned int, const std::vector<char> &);
1116// extern template void MaxHelper::Exec(unsigned int, const std::vector<int> &);
1117// extern template void MaxHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1118
1119template <typename ResultType>
1120class R__CLING_PTRCHECK(off) SumHelper : public RActionImpl<SumHelper<ResultType>> {
1121 std::shared_ptr<ResultType> fResultSum;
1122 Results<ResultType> fSums;
1123 Results<ResultType> fCompensations;
1124
1125 /// Evaluate neutral element for this type and the sum operation.
1126 /// This is assumed to be any_value - any_value if operator- is defined
1127 /// for the type, otherwise a default-constructed ResultType{} is used.
1128 template <typename T = ResultType>
1129 auto NeutralElement(const T &v, int /*overloadresolver*/) -> decltype(v - v)
1130 {
1131 return v - v;
1132 }
1133
1134 template <typename T = ResultType, typename Dummy = int>
1135 ResultType NeutralElement(const T &, Dummy) // this overload has lower priority thanks to the template arg
1136 {
1137 return ResultType{};
1138 }
1139
1140public:
1141 SumHelper(SumHelper &&) = default;
1142 SumHelper(const SumHelper &) = delete;
1143 SumHelper(const std::shared_ptr<ResultType> &sumVPtr, const unsigned int nSlots)
1144 : fResultSum(sumVPtr), fSums(nSlots, NeutralElement(*sumVPtr, -1)),
1145 fCompensations(nSlots, NeutralElement(*sumVPtr, -1))
1146 {
1147 }
1148 void InitTask(TTreeReader *, unsigned int) {}
1149
1150 void Exec(unsigned int slot, ResultType x)
1151 {
1152 // Kahan Sum:
1153 ResultType y = x - fCompensations[slot];
1154 ResultType t = fSums[slot] + y;
1155 fCompensations[slot] = (t - fSums[slot]) - y;
1156 fSums[slot] = t;
1157 }
1158
1159 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1160 void Exec(unsigned int slot, const T &vs)
1161 {
1162 for (auto &&v : vs) {
1163 Exec(slot, v);
1164 }
1165 }
1166
1167 void Initialize() { /* noop */}
1168
1169 void Finalize()
1170 {
1171 ResultType sum(NeutralElement(ResultType{}, -1));
1172 ResultType compensation(NeutralElement(ResultType{}, -1));
1173 ResultType y(NeutralElement(ResultType{}, -1));
1174 ResultType t(NeutralElement(ResultType{}, -1));
1175 for (auto &m : fSums) {
1176 // Kahan Sum:
1177 y = m - compensation;
1178 t = sum + y;
1179 compensation = (t - sum) - y;
1180 sum = t;
1181 }
1182 *fResultSum += sum;
1183 }
1184
1185 // Helper functions for RMergeableValue
1186 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1187 {
1188 return std::make_unique<RMergeableSum<ResultType>>(*fResultSum);
1189 }
1190
1191 ResultType &PartialUpdate(unsigned int slot) { return fSums[slot]; }
1192
1193 std::string GetActionName() { return "Sum"; }
1194
1195 SumHelper MakeNew(void *newResult)
1196 {
1197 auto &result = *static_cast<std::shared_ptr<ResultType> *>(newResult);
1198 *result = NeutralElement(*result, -1);
1199 return SumHelper(result, fSums.size());
1200 }
1201};
1202
1203class R__CLING_PTRCHECK(off) MeanHelper : public RActionImpl<MeanHelper> {
1204 std::shared_ptr<double> fResultMean;
1205 std::vector<ULong64_t> fCounts;
1206 std::vector<double> fSums;
1207 std::vector<double> fPartialMeans;
1208 std::vector<double> fCompensations;
1209
1210public:
1211 MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
1212 MeanHelper(MeanHelper &&) = default;
1213 MeanHelper(const MeanHelper &) = delete;
1214 void InitTask(TTreeReader *, unsigned int) {}
1215 void Exec(unsigned int slot, double v);
1216
1217 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1218 void Exec(unsigned int slot, const T &vs)
1219 {
1220 for (auto &&v : vs) {
1221
1222 fCounts[slot]++;
1223 // Kahan Sum:
1224 double y = v - fCompensations[slot];
1225 double t = fSums[slot] + y;
1226 fCompensations[slot] = (t - fSums[slot]) - y;
1227 fSums[slot] = t;
1228 }
1229 }
1230
1231 void Initialize() { /* noop */}
1232
1233 void Finalize();
1234
1235 // Helper functions for RMergeableValue
1236 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1237 {
1238 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1239 return std::make_unique<RMergeableMean>(*fResultMean, counts);
1240 }
1241
1242 double &PartialUpdate(unsigned int slot);
1243
1244 std::string GetActionName() { return "Mean"; }
1245
1246 MeanHelper MakeNew(void *newResult)
1247 {
1248 auto &result = *static_cast<std::shared_ptr<double> *>(newResult);
1249 return MeanHelper(result, fSums.size());
1250 }
1251};
1252
1253extern template void MeanHelper::Exec(unsigned int, const std::vector<float> &);
1254extern template void MeanHelper::Exec(unsigned int, const std::vector<double> &);
1255extern template void MeanHelper::Exec(unsigned int, const std::vector<char> &);
1256extern template void MeanHelper::Exec(unsigned int, const std::vector<int> &);
1257extern template void MeanHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1258
1259class R__CLING_PTRCHECK(off) StdDevHelper : public RActionImpl<StdDevHelper> {
1260 // Number of subsets of data
1261 unsigned int fNSlots;
1262 std::shared_ptr<double> fResultStdDev;
1263 // Number of element for each slot
1264 std::vector<ULong64_t> fCounts;
1265 // Mean of each slot
1266 std::vector<double> fMeans;
1267 // Squared distance from the mean
1268 std::vector<double> fDistancesfromMean;
1269
1270public:
1271 StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
1272 StdDevHelper(StdDevHelper &&) = default;
1273 StdDevHelper(const StdDevHelper &) = delete;
1274 void InitTask(TTreeReader *, unsigned int) {}
1275 void Exec(unsigned int slot, double v);
1276
1277 template <typename T, std::enable_if_t<IsDataContainer<T>::value, int> = 0>
1278 void Exec(unsigned int slot, const T &vs)
1279 {
1280 for (auto &&v : vs) {
1281 Exec(slot, v);
1282 }
1283 }
1284
1285 void Initialize() { /* noop */}
1286
1287 void Finalize();
1288
1289 // Helper functions for RMergeableValue
1290 std::unique_ptr<RMergeableValueBase> GetMergeableValue() const final
1291 {
1292 const ULong64_t counts = std::accumulate(fCounts.begin(), fCounts.end(), 0ull);
1293 const Double_t mean =
1294 std::inner_product(fMeans.begin(), fMeans.end(), fCounts.begin(), 0.) / static_cast<Double_t>(counts);
1295 return std::make_unique<RMergeableStdDev>(*fResultStdDev, counts, mean);
1296 }
1297
1298 std::string GetActionName() { return "StdDev"; }
1299
1300 StdDevHelper MakeNew(void *newResult)
1301 {
1302 auto &result = *static_cast<std::shared_ptr<double> *>(newResult);
1303 return StdDevHelper(result, fCounts.size());
1304 }
1305};
1306
1307extern template void StdDevHelper::Exec(unsigned int, const std::vector<float> &);
1308extern template void StdDevHelper::Exec(unsigned int, const std::vector<double> &);
1309extern template void StdDevHelper::Exec(unsigned int, const std::vector<char> &);
1310extern template void StdDevHelper::Exec(unsigned int, const std::vector<int> &);
1311extern template void StdDevHelper::Exec(unsigned int, const std::vector<unsigned int> &);
1312
1313template <typename PrevNodeType>
1314class R__CLING_PTRCHECK(off) DisplayHelper : public RActionImpl<DisplayHelper<PrevNodeType>> {
1315private:
1317 std::shared_ptr<Display_t> fDisplayerHelper;
1318 std::shared_ptr<PrevNodeType> fPrevNode;
1319 size_t fEntriesToProcess;
1320
1321public:
1322 DisplayHelper(size_t nRows, const std::shared_ptr<Display_t> &d, const std::shared_ptr<PrevNodeType> &prevNode)
1323 : fDisplayerHelper(d), fPrevNode(prevNode), fEntriesToProcess(nRows)
1324 {
1325 }
1326 DisplayHelper(DisplayHelper &&) = default;
1327 DisplayHelper(const DisplayHelper &) = delete;
1328 void InitTask(TTreeReader *, unsigned int) {}
1329
1330 template <typename... Columns>
1331 void Exec(unsigned int, Columns &... columns)
1332 {
1333 if (fEntriesToProcess == 0)
1334 return;
1335
1336 fDisplayerHelper->AddRow(columns...);
1337 --fEntriesToProcess;
1338
1339 if (fEntriesToProcess == 0) {
1340 // No more entries to process. Send a one-time signal that this node
1341 // of the graph is done. It is important that the 'StopProcessing'
1342 // method is only called once from this helper, otherwise it would seem
1343 // like more than one operation has completed its work.
1344 fPrevNode->StopProcessing();
1345 }
1346 }
1347
1348 void Initialize() {}
1349
1350 void Finalize() {}
1351
1352 std::string GetActionName() { return "Display"; }
1353};
1354
1355template <typename T>
1356void *GetData(ROOT::VecOps::RVec<T> &v)
1357{
1358 return v.data();
1359}
1360
1361template <typename T>
1362void *GetData(T & /*v*/)
1363{
1364 return nullptr;
1365}
1366
1367template <typename T>
1368void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &name,
1369 TBranch *&branch, void *&branchAddress, T *address, RBranchSet &outputBranches,
1370 bool /*isDefine*/)
1371{
1372 static TClassRef TBOClRef("TBranchObject");
1373
1374 TBranch *inputBranch = nullptr;
1375 if (inputTree) {
1376 inputBranch = inputTree->GetBranch(inName.c_str());
1377 if (!inputBranch) // try harder
1378 inputBranch = inputTree->FindBranch(inName.c_str());
1379 }
1380
1381 auto *outputBranch = outputBranches.Get(name);
1382 if (outputBranch) {
1383 // the output branch was already created, we just need to (re)set its address
1384 if (inputBranch && inputBranch->IsA() == TBOClRef) {
1385 outputBranch->SetAddress(reinterpret_cast<T **>(inputBranch->GetAddress()));
1386 } else if (outputBranch->IsA() != TBranch::Class()) {
1387 branchAddress = address;
1388 outputBranch->SetAddress(&branchAddress);
1389 } else {
1390 outputBranch->SetAddress(address);
1391 branchAddress = address;
1392 }
1393 return;
1394 }
1395
1396 if (inputBranch) {
1397 // Respect the original bufsize and splitlevel arguments
1398 // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid
1399 // writing garbage when unsplit objects cannot be written as split objects (e.g. in case of a polymorphic
1400 // TObject branch, see https://bit.ly/2EjLMId ).
1401 const auto bufSize = inputBranch->GetBasketSize();
1402 const auto splitLevel = inputBranch->GetSplitLevel();
1403
1404 if (inputBranch->IsA() == TBOClRef) {
1405 // Need to pass a pointer to pointer
1406 outputBranch =
1407 outputTree.Branch(name.c_str(), reinterpret_cast<T **>(inputBranch->GetAddress()), bufSize, splitLevel);
1408 } else {
1409 outputBranch = outputTree.Branch(name.c_str(), address, bufSize, splitLevel);
1410 }
1411 } else {
1412 outputBranch = outputTree.Branch(name.c_str(), address);
1413 }
1414 outputBranches.Insert(name, outputBranch);
1415 // This is not an array branch, so we don't register the address of the output branch here
1416 branch = nullptr;
1417 branchAddress = nullptr;
1418}
1419
1420/// Helper function for SnapshotHelper and SnapshotHelperMT. It creates new branches for the output TTree of a Snapshot.
1421/// This overload is called for columns of type `RVec<T>`. For RDF, these can represent:
1422/// 1. c-style arrays in ROOT files, so we are sure that there are input trees to which we can ask the correct branch
1423/// title
1424/// 2. RVecs coming from a custom column or the input file/data-source
1425/// 3. vectors coming from ROOT files that are being read as RVecs
1426/// 4. TClonesArray
1427///
1428/// In case of 1., we keep aside the pointer to the branch and the pointer to the input value (in `branch` and
1429/// `branchAddress`) so we can intercept changes in the address of the input branch and tell the output branch.
1430template <typename T>
1431void SetBranchesHelper(TTree *inputTree, TTree &outputTree, const std::string &inName, const std::string &outName,
1432 TBranch *&branch, void *&branchAddress, RVec<T> *ab, RBranchSet &outputBranches, bool isDefine)
1433{
1434 TBranch *inputBranch = nullptr;
1435 if (inputTree) {
1436 inputBranch = inputTree->GetBranch(inName.c_str());
1437 if (!inputBranch) // try harder
1438 inputBranch = inputTree->FindBranch(inName.c_str());
1439 }
1440 auto *outputBranch = outputBranches.Get(outName);
1441
1442 // if no backing input branch, we must write out an RVec
1443 bool mustWriteRVec = (inputBranch == nullptr || isDefine);
1444 // otherwise, if input branch is TClonesArray, must write out an RVec
1445 if (!mustWriteRVec && std::string_view(inputBranch->GetClassName()) == "TClonesArray") {
1446 mustWriteRVec = true;
1447 Warning("Snapshot",
1448 "Branch \"%s\" contains TClonesArrays but the type specified to Snapshot was RVec<T>. The branch will "
1449 "be written out as a RVec instead of a TClonesArray. Specify that the type of the branch is "
1450 "TClonesArray as a Snapshot template parameter to write out a TClonesArray instead.",
1451 inName.c_str());
1452 }
1453 // otherwise, if input branch is a std::vector or RVec, must write out an RVec
1454 if (!mustWriteRVec) {
1455 const auto STLKind = TClassEdit::IsSTLCont(inputBranch->GetClassName());
1456 if (STLKind == ROOT::ESTLType::kSTLvector || STLKind == ROOT::ESTLType::kROOTRVec)
1457 mustWriteRVec = true;
1458 }
1459
1460 if (mustWriteRVec) {
1461 // Treat:
1462 // 2. RVec coming from a custom column or a source
1463 // 3. RVec coming from a column on disk of type vector (the RVec is adopting the data of that vector)
1464 // 4. TClonesArray written out as RVec<T>
1465 if (outputBranch) {
1466 // needs to be SetObject (not SetAddress) to mimic what happens when this TBranchElement is constructed
1467 outputBranch->SetObject(ab);
1468 } else {
1469 auto *b = outputTree.Branch(outName.c_str(), ab);
1470 outputBranches.Insert(outName, b);
1471 }
1472 return;
1473 }
1474
1475 // else this must be a C-array, aka case 1.
1476 auto dataPtr = ab->data();
1477
1478 if (outputBranch) {
1479 if (outputBranch->IsA() != TBranch::Class()) {
1480 branchAddress = dataPtr;
1481 outputBranch->SetAddress(&branchAddress);
1482 } else {
1483 outputBranch->SetAddress(dataPtr);
1484 }
1485 } else {
1486 // must construct the leaflist for the output branch and create the branch in the output tree
1487 auto *const leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
1488 const auto bname = leaf->GetName();
1489 auto *sizeLeaf = leaf->GetLeafCount();
1490 const auto sizeLeafName = sizeLeaf ? std::string(sizeLeaf->GetName()) : std::to_string(leaf->GetLenStatic());
1491
1492 if (sizeLeaf && !outputBranches.Get(sizeLeafName)) {
1493 // The output array branch `bname` has dynamic size stored in leaf `sizeLeafName`, but that leaf has not been
1494 // added to the output tree yet. However, the size leaf has to be available for the creation of the array
1495 // branch to be successful. So we create the size leaf here.
1496 const auto sizeTypeStr = TypeName2ROOTTypeName(sizeLeaf->GetTypeName());
1497 const auto sizeBufSize = sizeLeaf->GetBranch()->GetBasketSize();
1498 // The null branch address is a placeholder. It will be set when SetBranchesHelper is called for `sizeLeafName`
1499 auto *sizeBranch = outputTree.Branch(sizeLeafName.c_str(), (void *)nullptr,
1500 (sizeLeafName + '/' + sizeTypeStr).c_str(), sizeBufSize);
1501 outputBranches.Insert(sizeLeafName, sizeBranch);
1502 }
1503
1504 const auto btype = leaf->GetTypeName();
1505 const auto rootbtype = TypeName2ROOTTypeName(btype);
1506 if (rootbtype == ' ') {
1507 Warning("Snapshot",
1508 "RDataFrame::Snapshot: could not correctly construct a leaflist for C-style array in column %s. This "
1509 "column will not be written out.",
1510 bname);
1511 } else {
1512 const auto leaflist = std::string(bname) + "[" + sizeLeafName + "]/" + rootbtype;
1513 outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str());
1514 outputBranch->SetTitle(inputBranch->GetTitle());
1515 outputBranches.Insert(outName, outputBranch);
1516 branch = outputBranch;
1517 branchAddress = ab->data();
1518 }
1519 }
1520}
1521
1522void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName);
1523
1524/// Helper object for a single-thread Snapshot action
1525template <typename... ColTypes>
1526class R__CLING_PTRCHECK(off) SnapshotHelper : public RActionImpl<SnapshotHelper<ColTypes...>> {
1527 std::string fFileName;
1528 std::string fDirName;
1529 std::string fTreeName;
1530 RSnapshotOptions fOptions;
1531 std::unique_ptr<TFile> fOutputFile;
1532 std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
1533 bool fBranchAddressesNeedReset{true};
1534 ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1535 ColumnNames_t fOutputBranchNames;
1536 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
1537 // TODO we might be able to unify fBranches, fBranchAddresses and fOutputBranches
1538 std::vector<TBranch *> fBranches; // Addresses of branches in output, non-null only for the ones holding C arrays
1539 std::vector<void *> fBranchAddresses; // Addresses of objects associated to output branches
1540 RBranchSet fOutputBranches;
1541 std::vector<bool> fIsDefine;
1542
1543public:
1544 using ColumnTypes_t = TypeList<ColTypes...>;
1545 SnapshotHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
1546 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options,
1547 std::vector<bool> &&isDefine)
1548 : fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1549 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fBranches(vbnames.size(), nullptr),
1550 fBranchAddresses(vbnames.size(), nullptr), fIsDefine(std::move(isDefine))
1551 {
1552 ValidateSnapshotOutput(fOptions, fTreeName, fFileName);
1553 }
1554
1555 SnapshotHelper(const SnapshotHelper &) = delete;
1556 SnapshotHelper(SnapshotHelper &&) = default;
1557 ~SnapshotHelper()
1558 {
1559 if (!fTreeName.empty() /*not moved from*/ && !fOutputFile /* did not run */ && fOptions.fLazy)
1560 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1561 }
1562
1563 void InitTask(TTreeReader *r, unsigned int /* slot */)
1564 {
1565 if (r)
1566 fInputTree = r->GetTree();
1567 fBranchAddressesNeedReset = true;
1568 }
1569
1570 void Exec(unsigned int /* slot */, ColTypes &... values)
1571 {
1572 using ind_t = std::index_sequence_for<ColTypes...>;
1573 if (!fBranchAddressesNeedReset) {
1574 UpdateCArraysPtrs(values..., ind_t{});
1575 } else {
1576 SetBranches(values..., ind_t{});
1577 fBranchAddressesNeedReset = false;
1578 }
1579 fOutputTree->Fill();
1580 }
1581
1582 template <std::size_t... S>
1583 void UpdateCArraysPtrs(ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1584 {
1585 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1586 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1587 // leaving associated to the branch of the output tree an invalid pointer.
1588 // With this code, we set the value of the pointer in the output branch anew when needed.
1589 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1590 // we need an int for the expander list.
1591 int expander[] = {(fBranches[S] && fBranchAddresses[S] != GetData(values)
1592 ? fBranches[S]->SetAddress(GetData(values)),
1593 fBranchAddresses[S] = GetData(values), 0 : 0, 0)...,
1594 0};
1595 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1596 }
1597
1598 template <std::size_t... S>
1599 void SetBranches(ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1600 {
1601 // create branches in output tree
1602 int expander[] = {(SetBranchesHelper(fInputTree, *fOutputTree, fInputBranchNames[S], fOutputBranchNames[S],
1603 fBranches[S], fBranchAddresses[S], &values, fOutputBranches, fIsDefine[S]),
1604 0)...,
1605 0};
1606 fOutputBranches.AssertNoNullBranchAddresses();
1607 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1608 }
1609
1610 void Initialize()
1611 {
1612 fOutputFile.reset(
1613 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
1615 if(!fOutputFile)
1616 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
1617
1618 TDirectory *outputDir = fOutputFile.get();
1619 if (!fDirName.empty()) {
1620 TString checkupdate = fOptions.fMode;
1621 checkupdate.ToLower();
1622 if (checkupdate == "update")
1623 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
1624 else
1625 outputDir = fOutputFile->mkdir(fDirName.c_str());
1626 }
1627
1628 fOutputTree =
1629 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/outputDir);
1630
1631 if (fOptions.fAutoFlush)
1632 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
1633 }
1634
1635 void Finalize()
1636 {
1637 assert(fOutputTree != nullptr);
1638 assert(fOutputFile != nullptr);
1639
1640 // use AutoSave to flush TTree contents because TTree::Write writes in gDirectory, not in fDirectory
1641 fOutputTree->AutoSave("flushbaskets");
1642 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
1643 fOutputTree.reset();
1644 fOutputFile->Close();
1645 }
1646
1647 std::string GetActionName() { return "Snapshot"; }
1648
1649 ROOT::RDF::SampleCallback_t GetSampleCallback() final
1650 {
1651 return [this](unsigned int, const RSampleInfo &) mutable { fBranchAddressesNeedReset = true; };
1652 }
1653};
1654
1655/// Helper object for a multi-thread Snapshot action
1656template <typename... ColTypes>
1657class R__CLING_PTRCHECK(off) SnapshotHelperMT : public RActionImpl<SnapshotHelperMT<ColTypes...>> {
1658 unsigned int fNSlots;
1659 std::unique_ptr<ROOT::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
1660 std::vector<std::shared_ptr<ROOT::TBufferMergerFile>> fOutputFiles;
1661 std::vector<std::unique_ptr<TTree>> fOutputTrees;
1662 std::vector<int> fBranchAddressesNeedReset; // vector<bool> does not allow concurrent writing of different elements
1663 std::string fFileName; // name of the output file name
1664 std::string fDirName; // name of TFile subdirectory in which output must be written (possibly empty)
1665 std::string fTreeName; // name of output tree
1666 RSnapshotOptions fOptions; // struct holding options to pass down to TFile and TTree in this action
1667 ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1668 ColumnNames_t fOutputBranchNames;
1669 std::vector<TTree *> fInputTrees; // Current input trees. Set at initialization time (`InitTask`)
1670 // Addresses of branches in output per slot, non-null only for the ones holding C arrays
1671 std::vector<std::vector<TBranch *>> fBranches;
1672 // Addresses associated to output branches per slot, non-null only for the ones holding C arrays
1673 std::vector<std::vector<void *>> fBranchAddresses;
1674 std::vector<RBranchSet> fOutputBranches;
1675 std::vector<bool> fIsDefine;
1676
1677public:
1678 using ColumnTypes_t = TypeList<ColTypes...>;
1679 SnapshotHelperMT(const unsigned int nSlots, std::string_view filename, std::string_view dirname,
1680 std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
1681 const RSnapshotOptions &options, std::vector<bool> &&isDefine)
1682 : fNSlots(nSlots), fOutputFiles(fNSlots), fOutputTrees(fNSlots), fBranchAddressesNeedReset(fNSlots, 1),
1683 fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1684 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fInputTrees(fNSlots),
1685 fBranches(fNSlots, std::vector<TBranch *>(vbnames.size(), nullptr)),
1686 fBranchAddresses(fNSlots, std::vector<void *>(vbnames.size(), nullptr)), fOutputBranches(fNSlots),
1687 fIsDefine(std::move(isDefine))
1688 {
1689 ValidateSnapshotOutput(fOptions, fTreeName, fFileName);
1690 }
1691 SnapshotHelperMT(const SnapshotHelperMT &) = delete;
1692 SnapshotHelperMT(SnapshotHelperMT &&) = default;
1693 ~SnapshotHelperMT()
1694 {
1695 if (!fTreeName.empty() /*not moved from*/ && fOptions.fLazy &&
1696 std::all_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &f) { return !f; }) /* never run */)
1697 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1698 }
1699
1700 void InitTask(TTreeReader *r, unsigned int slot)
1701 {
1702 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
1703 if (!fOutputFiles[slot]) {
1704 // first time this thread executes something, let's create a TBufferMerger output directory
1705 fOutputFiles[slot] = fMerger->GetFile();
1706 }
1707 TDirectory *treeDirectory = fOutputFiles[slot].get();
1708 if (!fDirName.empty()) {
1709 // call returnExistingDirectory=true since MT can end up making this call multiple times
1710 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
1711 }
1712 // re-create output tree as we need to create its branches again, with new input variables
1713 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
1714 fOutputTrees[slot] =
1715 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
1716 fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
1717 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
1718 fOutputTrees[slot]->SetImplicitMT(false);
1719 if (fOptions.fAutoFlush)
1720 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
1721 if (r) {
1722 // not an empty-source RDF
1723 fInputTrees[slot] = r->GetTree();
1724 }
1725 fBranchAddressesNeedReset[slot] = 1; // reset first event flag for this slot
1726 }
1727
1728 void FinalizeTask(unsigned int slot)
1729 {
1730 if (fOutputTrees[slot]->GetEntries() > 0)
1731 fOutputFiles[slot]->Write();
1732 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
1733 fOutputTrees[slot].reset(nullptr);
1734 fOutputBranches[slot].Clear();
1735 }
1736
1737 void Exec(unsigned int slot, ColTypes &... values)
1738 {
1739 using ind_t = std::index_sequence_for<ColTypes...>;
1740 if (fBranchAddressesNeedReset[slot] == 0) {
1741 UpdateCArraysPtrs(slot, values..., ind_t{});
1742 } else {
1743 SetBranches(slot, values..., ind_t{});
1744 fBranchAddressesNeedReset[slot] = 0;
1745 }
1746 fOutputTrees[slot]->Fill();
1747 auto entries = fOutputTrees[slot]->GetEntries();
1748 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
1749 if ((autoFlush > 0) && (entries % autoFlush == 0))
1750 fOutputFiles[slot]->Write();
1751 }
1752
1753 template <std::size_t... S>
1754 void UpdateCArraysPtrs(unsigned int slot, ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1755 {
1756 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1757 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1758 // leaving associated to the branch of the output tree an invalid pointer.
1759 // With this code, we set the value of the pointer in the output branch anew when needed.
1760 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1761 // we need an int for the expander list.
1762 int expander[] = {(fBranches[slot][S] && fBranchAddresses[slot][S] != GetData(values)
1763 ? fBranches[slot][S]->SetAddress(GetData(values)),
1764 fBranchAddresses[slot][S] = GetData(values), 0 : 0, 0)...,
1765 0};
1766 (void)expander; // avoid unused parameter warnings (gcc 12.1)
1767 }
1768
1769 template <std::size_t... S>
1770 void SetBranches(unsigned int slot, ColTypes &... values, std::index_sequence<S...> /*dummy*/)
1771 {
1772 // hack to call TTree::Branch on all variadic template arguments
1773 int expander[] = {(SetBranchesHelper(fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S],
1774 fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S],
1775 &values, fOutputBranches[slot], fIsDefine[S]),
1776 0)...,
1777 0};
1778 fOutputBranches[slot].AssertNoNullBranchAddresses();
1779 (void)expander; // avoid unused parameter warnings (gcc 12.1)
1780 }
1781
1782 void Initialize()
1783 {
1784 const auto cs = ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
1785 auto out_file = TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/fFileName.c_str(), cs);
1786 if(!out_file)
1787 throw std::runtime_error("Snapshot: could not create output file " + fFileName);
1788 fMerger = std::make_unique<ROOT::TBufferMerger>(std::unique_ptr<TFile>(out_file));
1789 }
1790
1791 void Finalize()
1792 {
1793 assert(std::any_of(fOutputFiles.begin(), fOutputFiles.end(), [](const auto &ptr) { return ptr != nullptr; }));
1794
1795 auto fileWritten = false;
1796 for (auto &file : fOutputFiles) {
1797 if (file) {
1798 file->Write();
1799 file->Close();
1800 fileWritten = true;
1801 }
1802 }
1803
1804 if (!fileWritten) {
1805 Warning("Snapshot",
1806 "No input entries (input TTree was empty or no entry passed the Filters). Output TTree is empty.");
1807 }
1808
1809 // flush all buffers to disk by destroying the TBufferMerger
1810 fOutputFiles.clear();
1811 fMerger.reset();
1812 }
1813
1814 std::string GetActionName() { return "Snapshot"; }
1815
1816 ROOT::RDF::SampleCallback_t GetSampleCallback() final
1817 {
1818 return [this](unsigned int slot, const RSampleInfo &) mutable { fBranchAddressesNeedReset[slot] = 1; };
1819 }
1820};
1821
1822template <typename Acc, typename Merge, typename R, typename T, typename U,
1823 bool MustCopyAssign = std::is_same<R, U>::value>
1824class R__CLING_PTRCHECK(off) AggregateHelper
1825 : public RActionImpl<AggregateHelper<Acc, Merge, R, T, U, MustCopyAssign>> {
1826 Acc fAggregate;
1827 Merge fMerge;
1828 std::shared_ptr<U> fResult;
1829 Results<U> fAggregators;
1830
1831public:
1832 using ColumnTypes_t = TypeList<T>;
1833
1834 AggregateHelper(Acc &&f, Merge &&m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1835 : fAggregate(std::move(f)), fMerge(std::move(m)), fResult(result), fAggregators(nSlots, *result)
1836 {
1837 }
1838
1839 AggregateHelper(Acc &f, Merge &m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1840 : fAggregate(f), fMerge(m), fResult(result), fAggregators(nSlots, *result)
1841 {
1842 }
1843
1844 AggregateHelper(AggregateHelper &&) = default;
1845 AggregateHelper(const AggregateHelper &) = delete;
1846
1847 void InitTask(TTreeReader *, unsigned int) {}
1848
1849 template <bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<MustCopyAssign_, int> = 0>
1850 void Exec(unsigned int slot, const T &value)
1851 {
1852 fAggregators[slot] = fAggregate(fAggregators[slot], value);
1853 }
1854
1855 template <bool MustCopyAssign_ = MustCopyAssign, std::enable_if_t<!MustCopyAssign_, int> = 0>
1856 void Exec(unsigned int slot, const T &value)
1857 {
1858 fAggregate(fAggregators[slot], value);
1859 }
1860
1861 void Initialize() { /* noop */}
1862
1863 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1864 bool MergeAll = std::is_same<void, MergeRet>::value>
1865 std::enable_if_t<MergeAll, void> Finalize()
1866 {
1867 fMerge(fAggregators);
1868 *fResult = fAggregators[0];
1869 }
1870
1871 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1872 bool MergeTwoByTwo = std::is_same<U, MergeRet>::value>
1873 std::enable_if_t<MergeTwoByTwo, void> Finalize(...) // ... needed to let compiler distinguish overloads
1874 {
1875 for (const auto &acc : fAggregators)
1876 *fResult = fMerge(*fResult, acc);
1877 }
1878
1879 U &PartialUpdate(unsigned int slot) { return fAggregators[slot]; }
1880
1881 std::string GetActionName() { return "Aggregate"; }
1882
1883 AggregateHelper MakeNew(void *newResult)
1884 {
1885 auto &result = *static_cast<std::shared_ptr<U> *>(newResult);
1886 return AggregateHelper(fAggregate, fMerge, result, fAggregators.size());
1887 }
1888};
1889
1890} // end of NS RDF
1891} // end of NS Internal
1892} // end of NS ROOT
1893
1894/// \endcond
1895
1896#endif
PyObject * fCallable
Handle_t Display_t
Display handle.
Definition GuiTypes.h:27
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
#define g(i)
Definition RSha256.hxx:105
#define h(i)
Definition RSha256.hxx:106
#define R(a, b, c, d, e, f, g, h, i)
Definition RSha256.hxx:110
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
double Double_t
Definition RtypesCore.h:59
unsigned long long ULong64_t
Definition RtypesCore.h:81
#define R__CLING_PTRCHECK(ONOFF)
Definition Rtypes.h:500
void Warning(const char *location, const char *msgfmt,...)
Use this function in warning situations.
Definition TError.cxx:241
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char filename
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char x1
char name[80]
Definition TGX11.cxx:110
TClass * IsA() const override
Definition TStringLong.h:20
TTime operator*(const TTime &t1, const TTime &t2)
Definition TTime.h:85
Base class for action helpers, see RInterface::Book() for more information.
pointer data() noexcept
Return a pointer to the vector's buffer, even if empty().
Definition RVec.hxx:280
This class is the textual representation of the content of a columnar dataset.
Definition RDisplay.hxx:64
This type represents a sample identifier, to be used in conjunction with RDataFrame features such as ...
typename RemoveFirstParameter< T >::type RemoveFirstParameter_t
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition RVec.hxx:1480
A TTree is a list of TBranches.
Definition TBranch.h:89
virtual const char * GetClassName() const
Return the name of the user class whose content is stored in this branch, if any.
Definition TBranch.cxx:1324
virtual char * GetAddress() const
Definition TBranch.h:208
static TClass * Class()
Int_t GetSplitLevel() const
Definition TBranch.h:246
TClass * IsA() const override
Definition TBranch.h:291
virtual Int_t GetBasketSize() const
Definition TBranch.h:213
TObjArray * GetListOfLeaves()
Definition TBranch.h:243
TClassRef is used to implement a permanent reference to a TClass object.
Definition TClassRef.h:28
Collection abstract base class.
Definition TCollection.h:65
TDirectory::TContext keeps track and restore the current directory.
Definition TDirectory.h:89
Describe directory structure in memory.
Definition TDirectory.h:45
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4053
TGraph with asymmetric error bars.
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)}
Definition TH1.h:620
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:58
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition TLeaf.h:57
A doubly linked list.
Definition TList.h:38
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition TNamed.cxx:164
const char * GetName() const override
Returns name of object.
Definition TNamed.h:47
const char * GetTitle() const override
Returns title of object.
Definition TNamed.h:48
TObject * UncheckedAt(Int_t i) const
Definition TObjArray.h:84
Statistical variable, defined by its mean and variance (RMS).
Definition TStatistic.h:33
Basic string class.
Definition TString.h:139
void ToLower()
Change string to lower-case.
Definition TString.cxx:1170
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition TTreeReader.h:44
A TTree represents a columnar dataset.
Definition TTree.h:79
virtual TBranch * FindBranch(const char *name)
Return the branch that correspond to the path 'branchname', which can include the name of the tree or...
Definition TTree.cxx:4832
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
Definition TTree.cxx:5285
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
Definition TTree.h:350
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition TTree.h:258
RooCmdArg Columns(Int_t ncol)
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
#define F(x, y, z)
#define H(x, y, z)
CPYCPPYY_EXTERN bool Exec(const std::string &cmd)
Definition API.cxx:341
std::unique_ptr< RMergeableVariations< T > > GetMergeableValue(ROOT::RDF::Experimental::RResultMap< T > &rmap)
Retrieve mergeable values after calling ROOT::RDF::VariationsFor .
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition RDFUtils.cxx:296
void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition RDFUtils.cxx:252
constexpr std::size_t FindIdxTrue(const T &arr)
Definition Utils.hxx:229
void(off) SmallVectorTemplateBase< T
double T(double x)
std::vector< std::string > ColumnNames_t
std::function< void(unsigned int, const ROOT::RDF::RSampleInfo &)> SampleCallback_t
The type of a data-block callback, registered with a RDataFrame computation graph via e....
ROOT type_traits extensions.
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
@ kROOTRVec
Definition ESTLType.h:46
@ kSTLvector
Definition ESTLType.h:30
int CompressionSettings(RCompressionSetting::EAlgorithm::EValues algorithm, int compressionLevel)
RooArgSet S(Args_t &&... args)
Definition RooArgSet.h:232
ROOT::ESTLType STLKind(std::string_view type)
Converts STL container name to number.
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition tmvaglob.cxx:176
Definition file.py:1
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
bool fLazy
Do not start the event loop when Snapshot is called.
int fCompressionLevel
Compression level of output file.
Lightweight storage for a collection of types.
TMarker m
Definition textangle.C:8
TLine l
Definition textangle.C:4
static uint64_t sum(uint64_t i)
Definition Factory.cxx:2345