Logo ROOT   6.16/01
Reference Guide
ActionHelpers.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 12/2016
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDFOPERATIONS
12#define ROOT_RDFOPERATIONS
13
14#include <algorithm>
15#include <limits>
16#include <memory>
17#include <stdexcept>
18#include <string>
19#include <type_traits>
20#include <vector>
21#include <iomanip>
22
23#include "Compression.h"
25#include "ROOT/RStringView.hxx"
26#include "ROOT/RVec.hxx"
27#include "ROOT/TBufferMerger.hxx" // for SnapshotHelper
29#include "ROOT/RDF/Utils.hxx"
30#include "ROOT/RMakeUnique.hxx"
32#include "ROOT/TypeTraits.hxx"
33#include "ROOT/RDF/RDisplay.hxx"
34#include "RtypesCore.h"
35#include "TBranch.h"
36#include "TClassEdit.h"
37#include "TDirectory.h"
38#include "TFile.h" // for SnapshotHelper
39#include "TH1.h"
40#include "TGraph.h"
41#include "TLeaf.h"
42#include "TObjArray.h"
43#include "TObject.h"
44#include "TTree.h"
45#include "TTreeReader.h" // for SnapshotHelper
46
47/// \cond HIDDEN_SYMBOLS
48
49namespace ROOT {
50namespace Detail {
51namespace RDF {
52template <typename Helper>
53class RActionImpl {
54public:
55 // call Helper::FinalizeTask if present, do nothing otherwise
56 template <typename T = Helper>
57 auto CallFinalizeTask(unsigned int slot) -> decltype(&T::FinalizeTask, void())
58 {
59 static_cast<Helper *>(this)->FinalizeTask(slot);
60 }
61
62 template <typename... Args>
63 void CallFinalizeTask(unsigned int, Args...) {}
64
65};
66
67} // namespace RDF
68} // namespace Detail
69
70namespace Internal {
71namespace RDF {
72using namespace ROOT::TypeTraits;
73using namespace ROOT::VecOps;
74using namespace ROOT::RDF;
75using namespace ROOT::Detail::RDF;
76
77using Hist_t = ::TH1D;
78
79/// The container type for each thread's partial result in an action helper
80// We have to avoid to instantiate std::vector<bool> as that makes it impossible to return a reference to one of
81// the thread-local results. In addition, a common definition for the type of the container makes it easy to swap
82// the type of the underlying container if e.g. we see problems with false sharing of the thread-local results..
83template <typename T>
84using Results = typename std::conditional<std::is_same<T, bool>::value, std::deque<T>, std::vector<T>>::type;
85
86template <typename F>
87class ForeachSlotHelper : public RActionImpl<ForeachSlotHelper<F>> {
88 F fCallable;
89
90public:
91 using ColumnTypes_t = RemoveFirstParameter_t<typename CallableTraits<F>::arg_types>;
92 ForeachSlotHelper(F &&f) : fCallable(f) {}
93 ForeachSlotHelper(ForeachSlotHelper &&) = default;
94 ForeachSlotHelper(const ForeachSlotHelper &) = delete;
95
96 void InitTask(TTreeReader *, unsigned int) {}
97
98 template <typename... Args>
99 void Exec(unsigned int slot, Args &&... args)
100 {
101 // check that the decayed types of Args are the same as the branch types
102 static_assert(std::is_same<TypeList<typename std::decay<Args>::type...>, ColumnTypes_t>::value, "");
103 fCallable(slot, std::forward<Args>(args)...);
104 }
105
106 void Initialize() { /* noop */}
107
108 void Finalize() { /* noop */}
109
110 std::string GetActionName() { return "ForeachSlot"; }
111};
112
113class CountHelper : public RActionImpl<CountHelper> {
114 const std::shared_ptr<ULong64_t> fResultCount;
115 Results<ULong64_t> fCounts;
116
117public:
118 using ColumnTypes_t = TypeList<>;
119 CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots);
120 CountHelper(CountHelper &&) = default;
121 CountHelper(const CountHelper &) = delete;
122 void InitTask(TTreeReader *, unsigned int) {}
123 void Exec(unsigned int slot);
124 void Initialize() { /* noop */}
125 void Finalize();
126 ULong64_t &PartialUpdate(unsigned int slot);
127
128 std::string GetActionName() { return "Count"; }
129};
130
131template <typename ProxiedVal_t>
132class ReportHelper : public RActionImpl<ReportHelper<ProxiedVal_t>> {
133 const std::shared_ptr<RCutFlowReport> fReport;
134 // Here we have a weak pointer since we need to keep track of the validity
135 // of the proxied node. It can happen that the user does not trigger the
136 // event loop by looking into the RResultPtr and the chain goes out of scope
137 // before the Finalize method is invoked.
138 std::weak_ptr<ProxiedVal_t> fProxiedWPtr;
139 bool fReturnEmptyReport;
140
141public:
142 using ColumnTypes_t = TypeList<>;
143 ReportHelper(const std::shared_ptr<RCutFlowReport> &report, const std::shared_ptr<ProxiedVal_t> &pp, bool emptyRep)
144 : fReport(report), fProxiedWPtr(pp), fReturnEmptyReport(emptyRep){};
145 ReportHelper(ReportHelper &&) = default;
146 ReportHelper(const ReportHelper &) = delete;
147 void InitTask(TTreeReader *, unsigned int) {}
148 void Exec(unsigned int /* slot */) {}
149 void Initialize() { /* noop */}
150 void Finalize()
151 {
152 // We need the weak_ptr in order to avoid crashes at tear down
153 if (!fReturnEmptyReport && !fProxiedWPtr.expired())
154 fProxiedWPtr.lock()->Report(*fReport);
155 }
156
157 std::string GetActionName() { return "Report"; }
158};
159
160class FillHelper : public RActionImpl<FillHelper> {
161 // this sets a total initial size of 16 MB for the buffers (can increase)
162 static constexpr unsigned int fgTotalBufSize = 2097152;
163 using BufEl_t = double;
164 using Buf_t = std::vector<BufEl_t>;
165
166 std::vector<Buf_t> fBuffers;
167 std::vector<Buf_t> fWBuffers;
168 const std::shared_ptr<Hist_t> fResultHist;
169 unsigned int fNSlots;
170 unsigned int fBufSize;
171 /// Histograms containing "snapshots" of partial results. Non-null only if a registered callback requires it.
172 Results<std::unique_ptr<Hist_t>> fPartialHists;
173 Buf_t fMin;
174 Buf_t fMax;
175
176 void UpdateMinMax(unsigned int slot, double v);
177
178public:
179 FillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots);
180 FillHelper(FillHelper &&) = default;
181 FillHelper(const FillHelper &) = delete;
182 void InitTask(TTreeReader *, unsigned int) {}
183 void Exec(unsigned int slot, double v);
184 void Exec(unsigned int slot, double v, double w);
185
186 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
187 void Exec(unsigned int slot, const T &vs)
188 {
189 auto &thisBuf = fBuffers[slot];
190 for (auto &v : vs) {
191 UpdateMinMax(slot, v);
192 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
193 }
194 }
195
196 template <typename T, typename W,
197 typename std::enable_if<IsContainer<T>::value && IsContainer<W>::value, int>::type = 0>
198 void Exec(unsigned int slot, const T &vs, const W &ws)
199 {
200 auto &thisBuf = fBuffers[slot];
201
202 for (auto &v : vs) {
203 UpdateMinMax(slot, v);
204 thisBuf.emplace_back(v);
205 }
206
207 auto &thisWBuf = fWBuffers[slot];
208 for (auto &w : ws) {
209 thisWBuf.emplace_back(w); // TODO: Can be optimised in case T == BufEl_t
210 }
211 }
212
213 template <typename T, typename W,
214 typename std::enable_if<IsContainer<T>::value && !IsContainer<W>::value, int>::type = 0>
215 void Exec(unsigned int slot, const T &vs, const W w)
216 {
217 auto &thisBuf = fBuffers[slot];
218 for (auto &v : vs) {
219 UpdateMinMax(slot, v);
220 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
221 }
222
223 auto &thisWBuf = fWBuffers[slot];
224 thisWBuf.insert(thisWBuf.end(), vs.size(), w);
225 }
226
227 Hist_t &PartialUpdate(unsigned int);
228
229 void Initialize() { /* noop */}
230
231 void Finalize();
232
233 std::string GetActionName() { return "Fill"; }
234};
235
236extern template void FillHelper::Exec(unsigned int, const std::vector<float> &);
237extern template void FillHelper::Exec(unsigned int, const std::vector<double> &);
238extern template void FillHelper::Exec(unsigned int, const std::vector<char> &);
239extern template void FillHelper::Exec(unsigned int, const std::vector<int> &);
240extern template void FillHelper::Exec(unsigned int, const std::vector<unsigned int> &);
241extern template void FillHelper::Exec(unsigned int, const std::vector<float> &, const std::vector<float> &);
242extern template void FillHelper::Exec(unsigned int, const std::vector<double> &, const std::vector<double> &);
243extern template void FillHelper::Exec(unsigned int, const std::vector<char> &, const std::vector<char> &);
244extern template void FillHelper::Exec(unsigned int, const std::vector<int> &, const std::vector<int> &);
245extern template void
246FillHelper::Exec(unsigned int, const std::vector<unsigned int> &, const std::vector<unsigned int> &);
247
248template <typename HIST = Hist_t>
249class FillParHelper : public RActionImpl<FillParHelper<HIST>> {
250 std::vector<HIST *> fObjects;
251
252public:
253 FillParHelper(FillParHelper &&) = default;
254 FillParHelper(const FillParHelper &) = delete;
255
256 FillParHelper(const std::shared_ptr<HIST> &h, const unsigned int nSlots) : fObjects(nSlots, nullptr)
257 {
258 fObjects[0] = h.get();
259 // Initialise all other slots
260 for (unsigned int i = 1; i < nSlots; ++i) {
261 fObjects[i] = new HIST(*fObjects[0]);
262 fObjects[i]->SetDirectory(nullptr);
263 }
264 }
265
266 void InitTask(TTreeReader *, unsigned int) {}
267
268 void Exec(unsigned int slot, double x0) // 1D histos
269 {
270 fObjects[slot]->Fill(x0);
271 }
272
273 void Exec(unsigned int slot, double x0, double x1) // 1D weighted and 2D histos
274 {
275 fObjects[slot]->Fill(x0, x1);
276 }
277
278 void Exec(unsigned int slot, double x0, double x1, double x2) // 2D weighted and 3D histos
279 {
280 fObjects[slot]->Fill(x0, x1, x2);
281 }
282
283 void Exec(unsigned int slot, double x0, double x1, double x2, double x3) // 3D weighted histos
284 {
285 fObjects[slot]->Fill(x0, x1, x2, x3);
286 }
287
288 template <typename X0, typename std::enable_if<IsContainer<X0>::value, int>::type = 0>
289 void Exec(unsigned int slot, const X0 &x0s)
290 {
291 auto thisSlotH = fObjects[slot];
292 for (auto &x0 : x0s) {
293 thisSlotH->Fill(x0); // TODO: Can be optimised in case T == vector<double>
294 }
295 }
296
297 template <typename X0, typename X1,
298 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value, int>::type = 0>
299 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
300 {
301 auto thisSlotH = fObjects[slot];
302 if (x0s.size() != x1s.size()) {
303 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
304 }
305 auto x0sIt = std::begin(x0s);
306 const auto x0sEnd = std::end(x0s);
307 auto x1sIt = std::begin(x1s);
308 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
309 thisSlotH->Fill(*x0sIt, *x1sIt); // TODO: Can be optimised in case T == vector<double>
310 }
311 }
312
313 template <typename X0, typename W,
314 typename std::enable_if<IsContainer<X0>::value && !IsContainer<W>::value, int>::type = 0>
315 void Exec(unsigned int slot, const X0 &x0s, const W w)
316 {
317 auto thisSlotH = fObjects[slot];
318 for (auto &&x : x0s) {
319 thisSlotH->Fill(x, w);
320 }
321 }
322
323 template <typename X0, typename X1, typename X2,
324 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && IsContainer<X2>::value,
325 int>::type = 0>
326 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s)
327 {
328 auto thisSlotH = fObjects[slot];
329 if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size())) {
330 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
331 }
332 auto x0sIt = std::begin(x0s);
333 const auto x0sEnd = std::end(x0s);
334 auto x1sIt = std::begin(x1s);
335 auto x2sIt = std::begin(x2s);
336 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++) {
337 thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt); // TODO: Can be optimised in case T == vector<double>
338 }
339 }
340
341 template <typename X0, typename X1, typename W,
342 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && !IsContainer<W>::value,
343 int>::type = 0>
344 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const W w)
345 {
346 auto thisSlotH = fObjects[slot];
347 if (x0s.size() != x1s.size()) {
348 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
349 }
350 auto x0sIt = std::begin(x0s);
351 const auto x0sEnd = std::end(x0s);
352 auto x1sIt = std::begin(x1s);
353 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
354 thisSlotH->Fill(*x0sIt, *x1sIt, w); // TODO: Can be optimised in case T == vector<double>
355 }
356 }
357
358 template <typename X0, typename X1, typename X2, typename X3,
359 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && IsContainer<X2>::value &&
361 int>::type = 0>
362 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s, const X3 &x3s)
363 {
364 auto thisSlotH = fObjects[slot];
365 if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size() && x1s.size() == x3s.size())) {
366 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
367 }
368 auto x0sIt = std::begin(x0s);
369 const auto x0sEnd = std::end(x0s);
370 auto x1sIt = std::begin(x1s);
371 auto x2sIt = std::begin(x2s);
372 auto x3sIt = std::begin(x3s);
373 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++, x3sIt++) {
374 thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt, *x3sIt); // TODO: Can be optimised in case T == vector<double>
375 }
376 }
377
378 template <typename X0, typename X1, typename X2, typename W,
379 typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && IsContainer<X2>::value &&
381 int>::type = 0>
382 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s, const W w)
383 {
384 auto thisSlotH = fObjects[slot];
385 if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size())) {
386 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
387 }
388 auto x0sIt = std::begin(x0s);
389 const auto x0sEnd = std::end(x0s);
390 auto x1sIt = std::begin(x1s);
391 auto x2sIt = std::begin(x2s);
392 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++) {
393 thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt, w);
394 }
395 }
396
397 void Initialize() { /* noop */}
398
399 void Finalize()
400 {
401 auto resObj = fObjects[0];
402 const auto nSlots = fObjects.size();
403 TList l;
404 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
405 for (unsigned int slot = 1; slot < nSlots; ++slot) {
406 l.Add(fObjects[slot]);
407 }
408
409 resObj->Merge(&l);
410 }
411
412 HIST &PartialUpdate(unsigned int slot) { return *fObjects[slot]; }
413
414 std::string GetActionName() { return "FillPar"; }
415};
416
417class FillTGraphHelper : public ROOT::Detail::RDF::RActionImpl<FillTGraphHelper> {
418public:
419 using Result_t = ::TGraph;
420
421private:
422 std::vector<::TGraph *> fGraphs;
423
424public:
425 FillTGraphHelper(FillTGraphHelper &&) = default;
426 FillTGraphHelper(const FillTGraphHelper &) = delete;
427
428 // The last parameter is always false, as at the moment there is no way to propagate the parameter from the user to
429 // this method
430 FillTGraphHelper(const std::shared_ptr<::TGraph> &g, const unsigned int nSlots) : fGraphs(nSlots, nullptr)
431 {
432 fGraphs[0] = g.get();
433 // Initialise all other slots
434 for (unsigned int i = 1; i < nSlots; ++i) {
435 fGraphs[i] = new TGraph(*fGraphs[0]);
436 }
437 }
438
439 void Initialize() {}
440 void InitTask(TTreeReader *, unsigned int) {}
441
442 template <typename X0, typename X1,
443 typename std::enable_if<
445 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
446 {
447 if (x0s.size() != x1s.size()) {
448 throw std::runtime_error("Cannot fill Graph with values in containers of different sizes.");
449 }
450 auto thisSlotG = fGraphs[slot];
451 auto x0sIt = std::begin(x0s);
452 const auto x0sEnd = std::end(x0s);
453 auto x1sIt = std::begin(x1s);
454 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
455 thisSlotG->SetPoint(thisSlotG->GetN(), *x0sIt, *x1sIt);
456 }
457 }
458
459 template <typename X0, typename X1>
460 void Exec(unsigned int slot, X0 x0, X1 x1)
461 {
462 auto thisSlotG = fGraphs[slot];
463 thisSlotG->SetPoint(thisSlotG->GetN(), x0, x1);
464 }
465
466 void Finalize()
467 {
468 const auto nSlots = fGraphs.size();
469 auto resGraph = fGraphs[0];
470 TList l;
471 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
472 for (unsigned int slot = 1; slot < nSlots; ++slot) {
473 l.Add(fGraphs[slot]);
474 }
475 resGraph->Merge(&l);
476 }
477
478 std::string GetActionName() { return "Graph"; }
479
480 Result_t &PartialUpdate(unsigned int slot) { return *fGraphs[slot]; }
481};
482
483// In case of the take helper we have 4 cases:
484// 1. The column is not an RVec, the collection is not a vector
485// 2. The column is not an RVec, the collection is a vector
486// 3. The column is an RVec, the collection is not a vector
487// 4. The column is an RVec, the collection is a vector
488
489// Case 1.: The column is not an RVec, the collection is not a vector
490// No optimisations, no transformations: just copies.
491template <typename RealT_t, typename T, typename COLL>
492class TakeHelper : public RActionImpl<TakeHelper<RealT_t, T, COLL>> {
493 Results<std::shared_ptr<COLL>> fColls;
494
495public:
496 using ColumnTypes_t = TypeList<T>;
497 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
498 {
499 fColls.emplace_back(resultColl);
500 for (unsigned int i = 1; i < nSlots; ++i)
501 fColls.emplace_back(std::make_shared<COLL>());
502 }
503 TakeHelper(TakeHelper &&) = default;
504 TakeHelper(const TakeHelper &) = delete;
505
506 void InitTask(TTreeReader *, unsigned int) {}
507
508 void Exec(unsigned int slot, T &v) { fColls[slot]->emplace_back(v); }
509
510 void Initialize() { /* noop */}
511
512 void Finalize()
513 {
514 auto rColl = fColls[0];
515 for (unsigned int i = 1; i < fColls.size(); ++i) {
516 auto &coll = fColls[i];
517 for (T &v : *coll) {
518 rColl->emplace_back(v);
519 }
520 }
521 }
522
523 COLL &PartialUpdate(unsigned int slot) { return *fColls[slot].get(); }
524
525 std::string GetActionName() { return "Take"; }
526};
527
528// Case 2.: The column is not an RVec, the collection is a vector
529// Optimisations, no transformations: just copies.
530template <typename RealT_t, typename T>
531class TakeHelper<RealT_t, T, std::vector<T>> : public RActionImpl<TakeHelper<RealT_t, T, std::vector<T>>> {
532 Results<std::shared_ptr<std::vector<T>>> fColls;
533
534public:
535 using ColumnTypes_t = TypeList<T>;
536 TakeHelper(const std::shared_ptr<std::vector<T>> &resultColl, const unsigned int nSlots)
537 {
538 fColls.emplace_back(resultColl);
539 for (unsigned int i = 1; i < nSlots; ++i) {
540 auto v = std::make_shared<std::vector<T>>();
541 v->reserve(1024);
542 fColls.emplace_back(v);
543 }
544 }
545 TakeHelper(TakeHelper &&) = default;
546 TakeHelper(const TakeHelper &) = delete;
547
548 void InitTask(TTreeReader *, unsigned int) {}
549
550 void Exec(unsigned int slot, T &v) { fColls[slot]->emplace_back(v); }
551
552 void Initialize() { /* noop */}
553
554 // This is optimised to treat vectors
555 void Finalize()
556 {
557 ULong64_t totSize = 0;
558 for (auto &coll : fColls)
559 totSize += coll->size();
560 auto rColl = fColls[0];
561 rColl->reserve(totSize);
562 for (unsigned int i = 1; i < fColls.size(); ++i) {
563 auto &coll = fColls[i];
564 rColl->insert(rColl->end(), coll->begin(), coll->end());
565 }
566 }
567
568 std::vector<T> &PartialUpdate(unsigned int slot) { return *fColls[slot]; }
569
570 std::string GetActionName() { return "Take"; }
571};
572
573// Case 3.: The column is a RVec, the collection is not a vector
574// No optimisations, transformations from RVecs to vectors
575template <typename RealT_t, typename COLL>
576class TakeHelper<RealT_t, RVec<RealT_t>, COLL> : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, COLL>> {
577 Results<std::shared_ptr<COLL>> fColls;
578
579public:
580 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
581 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
582 {
583 fColls.emplace_back(resultColl);
584 for (unsigned int i = 1; i < nSlots; ++i)
585 fColls.emplace_back(std::make_shared<COLL>());
586 }
587 TakeHelper(TakeHelper &&) = default;
588 TakeHelper(const TakeHelper &) = delete;
589
590 void InitTask(TTreeReader *, unsigned int) {}
591
592 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
593
594 void Initialize() { /* noop */}
595
596 void Finalize()
597 {
598 auto rColl = fColls[0];
599 for (unsigned int i = 1; i < fColls.size(); ++i) {
600 auto &coll = fColls[i];
601 for (auto &v : *coll) {
602 rColl->emplace_back(v);
603 }
604 }
605 }
606
607 std::string GetActionName() { return "Take"; }
608};
609
610// Case 4.: The column is an RVec, the collection is a vector
611// Optimisations, transformations from RVecs to vectors
612template <typename RealT_t>
613class TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>
614 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>> {
615
616 Results<std::shared_ptr<std::vector<std::vector<RealT_t>>>> fColls;
617
618public:
619 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
620 TakeHelper(const std::shared_ptr<std::vector<std::vector<RealT_t>>> &resultColl, const unsigned int nSlots)
621 {
622 fColls.emplace_back(resultColl);
623 for (unsigned int i = 1; i < nSlots; ++i) {
624 auto v = std::make_shared<std::vector<RealT_t>>();
625 v->reserve(1024);
626 fColls.emplace_back(v);
627 }
628 }
629 TakeHelper(TakeHelper &&) = default;
630 TakeHelper(const TakeHelper &) = delete;
631
632 void InitTask(TTreeReader *, unsigned int) {}
633
634 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
635
636 void Initialize() { /* noop */}
637
638 // This is optimised to treat vectors
639 void Finalize()
640 {
641 ULong64_t totSize = 0;
642 for (auto &coll : fColls)
643 totSize += coll->size();
644 auto rColl = fColls[0];
645 rColl->reserve(totSize);
646 for (unsigned int i = 1; i < fColls.size(); ++i) {
647 auto &coll = fColls[i];
648 rColl->insert(rColl->end(), coll->begin(), coll->end());
649 }
650 }
651
652 std::string GetActionName() { return "Take"; }
653};
654
655template <typename ResultType>
656class MinHelper : public RActionImpl<MinHelper<ResultType>> {
657 const std::shared_ptr<ResultType> fResultMin;
658 Results<ResultType> fMins;
659
660public:
661 MinHelper(MinHelper &&) = default;
662 MinHelper(const std::shared_ptr<ResultType> &minVPtr, const unsigned int nSlots)
663 : fResultMin(minVPtr), fMins(nSlots, std::numeric_limits<ResultType>::max())
664 {
665 }
666
667 void Exec(unsigned int slot, ResultType v) { fMins[slot] = std::min(v, fMins[slot]); }
668
669 void InitTask(TTreeReader *, unsigned int) {}
670
671 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
672 void Exec(unsigned int slot, const T &vs)
673 {
674 for (auto &&v : vs)
675 fMins[slot] = std::min(v, fMins[slot]);
676 }
677
678 void Initialize() { /* noop */}
679
680 void Finalize()
681 {
682 *fResultMin = std::numeric_limits<ResultType>::max();
683 for (auto &m : fMins)
684 *fResultMin = std::min(m, *fResultMin);
685 }
686
687 ResultType &PartialUpdate(unsigned int slot) { return fMins[slot]; }
688
689 std::string GetActionName() { return "Min"; }
690};
691
692// TODO
693// extern template void MinHelper::Exec(unsigned int, const std::vector<float> &);
694// extern template void MinHelper::Exec(unsigned int, const std::vector<double> &);
695// extern template void MinHelper::Exec(unsigned int, const std::vector<char> &);
696// extern template void MinHelper::Exec(unsigned int, const std::vector<int> &);
697// extern template void MinHelper::Exec(unsigned int, const std::vector<unsigned int> &);
698
699template <typename ResultType>
700class MaxHelper : public RActionImpl<MaxHelper<ResultType>> {
701 const std::shared_ptr<ResultType> fResultMax;
702 Results<ResultType> fMaxs;
703
704public:
705 MaxHelper(MaxHelper &&) = default;
706 MaxHelper(const MaxHelper &) = delete;
707 MaxHelper(const std::shared_ptr<ResultType> &maxVPtr, const unsigned int nSlots)
708 : fResultMax(maxVPtr), fMaxs(nSlots, std::numeric_limits<ResultType>::lowest())
709 {
710 }
711
712 void InitTask(TTreeReader *, unsigned int) {}
713 void Exec(unsigned int slot, ResultType v) { fMaxs[slot] = std::max(v, fMaxs[slot]); }
714
715 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
716 void Exec(unsigned int slot, const T &vs)
717 {
718 for (auto &&v : vs)
719 fMaxs[slot] = std::max((ResultType)v, fMaxs[slot]);
720 }
721
722 void Initialize() { /* noop */}
723
724 void Finalize()
725 {
726 *fResultMax = std::numeric_limits<ResultType>::lowest();
727 for (auto &m : fMaxs) {
728 *fResultMax = std::max(m, *fResultMax);
729 }
730 }
731
732 ResultType &PartialUpdate(unsigned int slot) { return fMaxs[slot]; }
733
734 std::string GetActionName() { return "Max"; }
735};
736
737// TODO
738// extern template void MaxHelper::Exec(unsigned int, const std::vector<float> &);
739// extern template void MaxHelper::Exec(unsigned int, const std::vector<double> &);
740// extern template void MaxHelper::Exec(unsigned int, const std::vector<char> &);
741// extern template void MaxHelper::Exec(unsigned int, const std::vector<int> &);
742// extern template void MaxHelper::Exec(unsigned int, const std::vector<unsigned int> &);
743
744template <typename ResultType>
745class SumHelper : public RActionImpl<SumHelper<ResultType>> {
746 const std::shared_ptr<ResultType> fResultSum;
747 Results<ResultType> fSums;
748
749 /// Evaluate neutral element for this type and the sum operation.
750 /// This is assumed to be any_value - any_value if operator- is defined
751 /// for the type, otherwise a default-constructed ResultType{} is used.
752 template <typename T = ResultType>
753 auto NeutralElement(const T &v, int /*overloadresolver*/) -> decltype(v - v)
754 {
755 return v - v;
756 }
757
758 template <typename T = ResultType, typename Dummy = int>
759 ResultType NeutralElement(const T &, Dummy) // this overload has lower priority thanks to the template arg
760 {
761 return ResultType{};
762 }
763
764public:
765 SumHelper(SumHelper &&) = default;
766 SumHelper(const SumHelper &) = delete;
767 SumHelper(const std::shared_ptr<ResultType> &sumVPtr, const unsigned int nSlots)
768 : fResultSum(sumVPtr), fSums(nSlots, NeutralElement(*sumVPtr, -1))
769 {
770 }
771
772 void InitTask(TTreeReader *, unsigned int) {}
773 void Exec(unsigned int slot, ResultType v) { fSums[slot] += v; }
774
775 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
776 void Exec(unsigned int slot, const T &vs)
777 {
778 for (auto &&v : vs)
779 fSums[slot] += static_cast<ResultType>(v);
780 }
781
782 void Initialize() { /* noop */}
783
784 void Finalize()
785 {
786 for (auto &m : fSums)
787 *fResultSum += m;
788 }
789
790 ResultType &PartialUpdate(unsigned int slot) { return fSums[slot]; }
791
792 std::string GetActionName() { return "Sum"; }
793};
794
795class MeanHelper : public RActionImpl<MeanHelper> {
796 const std::shared_ptr<double> fResultMean;
797 std::vector<ULong64_t> fCounts;
798 std::vector<double> fSums;
799 std::vector<double> fPartialMeans;
800
801public:
802 MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
803 MeanHelper(MeanHelper &&) = default;
804 MeanHelper(const MeanHelper &) = delete;
805 void InitTask(TTreeReader *, unsigned int) {}
806 void Exec(unsigned int slot, double v);
807
808 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
809 void Exec(unsigned int slot, const T &vs)
810 {
811 for (auto &&v : vs) {
812 fSums[slot] += v;
813 fCounts[slot]++;
814 }
815 }
816
817 void Initialize() { /* noop */}
818
819 void Finalize();
820
821 double &PartialUpdate(unsigned int slot);
822
823 std::string GetActionName() { return "Mean"; }
824};
825
826extern template void MeanHelper::Exec(unsigned int, const std::vector<float> &);
827extern template void MeanHelper::Exec(unsigned int, const std::vector<double> &);
828extern template void MeanHelper::Exec(unsigned int, const std::vector<char> &);
829extern template void MeanHelper::Exec(unsigned int, const std::vector<int> &);
830extern template void MeanHelper::Exec(unsigned int, const std::vector<unsigned int> &);
831
832class StdDevHelper : public RActionImpl<StdDevHelper> {
833 // Number of subsets of data
834 const unsigned int fNSlots;
835 const std::shared_ptr<double> fResultStdDev;
836 // Number of element for each slot
837 std::vector<ULong64_t> fCounts;
838 // Mean of each slot
839 std::vector<double> fMeans;
840 // Squared distance from the mean
841 std::vector<double> fDistancesfromMean;
842
843public:
844 StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
845 StdDevHelper(StdDevHelper &&) = default;
846 StdDevHelper(const StdDevHelper &) = delete;
847 void InitTask(TTreeReader *, unsigned int) {}
848 void Exec(unsigned int slot, double v);
849
850 template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
851 void Exec(unsigned int slot, const T &vs)
852 {
853 for (auto &&v : vs) {
854 Exec(slot, v);
855 }
856 }
857
858 void Initialize() { /* noop */}
859
860 void Finalize();
861
862 std::string GetActionName() { return "StdDev"; }
863};
864
865extern template void StdDevHelper::Exec(unsigned int, const std::vector<float> &);
866extern template void StdDevHelper::Exec(unsigned int, const std::vector<double> &);
867extern template void StdDevHelper::Exec(unsigned int, const std::vector<char> &);
868extern template void StdDevHelper::Exec(unsigned int, const std::vector<int> &);
869extern template void StdDevHelper::Exec(unsigned int, const std::vector<unsigned int> &);
870
871template <typename PrevNodeType>
872class DisplayHelper : public RActionImpl<DisplayHelper<PrevNodeType>> {
873private:
875 const std::shared_ptr<Display_t> fDisplayerHelper;
876 const std::shared_ptr<PrevNodeType> fPrevNode;
877
878public:
879 DisplayHelper(const std::shared_ptr<Display_t> &d, const std::shared_ptr<PrevNodeType> &prevNode)
880 : fDisplayerHelper(d), fPrevNode(prevNode)
881 {
882 }
883 DisplayHelper(DisplayHelper &&) = default;
884 DisplayHelper(const DisplayHelper &) = delete;
885 void InitTask(TTreeReader *, unsigned int) {}
886
887 template <typename... Columns>
888 void Exec(unsigned int, Columns... columns)
889 {
890 fDisplayerHelper->AddRow(columns...);
891 if (!fDisplayerHelper->HasNext()) {
892 fPrevNode->StopProcessing();
893 }
894 }
895
896 void Initialize() {}
897
898 void Finalize() {}
899
900 std::string GetActionName() { return "Display"; }
901};
902
903// std::vector<bool> is special, and not in a good way. As a consequence Snapshot of RVec<bool> needs to be treated
904// specially. In particular, if RVec<bool> is filled with a (fixed or variable size) boolean array coming from
905// a ROOT file, when writing out the correspinding branch from a Snapshot we do not have an address to set for the
906// TTree branch (std::vector<bool> and, consequently, RVec<bool> do not provide a `data()` method).
907// Bools is a lightweight wrapper around a C array of booleans that is meant to provide a stable address for the
908// output TTree to read the contents of the snapshotted branches at Fill time.
909class BoolArray {
910 std::size_t fSize = 0;
911 bool *fBools = nullptr;
912
913 bool *CopyVector(const RVec<bool> &v)
914 {
915 auto b = new bool[fSize];
916 std::copy(v.begin(), v.end(), b);
917 return b;
918 }
919
920 bool *CopyArray(bool *o, std::size_t size)
921 {
922 auto b = new bool[size];
923 for (auto i = 0u; i < size; ++i)
924 b[i] = o[i];
925 return b;
926 }
927
928public:
929 // this generic constructor could be replaced with a constexpr if in SetBranchesHelper
930 BoolArray() = default;
931 template <typename T>
932 BoolArray(const T &) { throw std::runtime_error("This constructor should never be called"); }
933 BoolArray(const RVec<bool> &v) : fSize(v.size()), fBools(CopyVector(v)) {}
934 BoolArray(const BoolArray &b)
935 {
936 CopyArray(b.fBools, b.fSize);
937 }
938 BoolArray &operator=(const BoolArray &b)
939 {
940 delete[] fBools;
941 CopyArray(b.fBools, b.fSize);
942 return *this;
943 }
944 BoolArray(BoolArray &&b)
945 {
946 fSize = b.fSize;
947 fBools = b.fBools;
948 b.fSize = 0;
949 b.fBools = nullptr;
950 }
951 BoolArray &operator=(BoolArray &&b)
952 {
953 delete[] fBools;
954 fSize = b.fSize;
955 fBools = b.fBools;
956 b.fSize = 0;
957 b.fBools = nullptr;
958 return *this;
959 }
960 ~BoolArray() { delete[] fBools; }
961 std::size_t Size() const { return fSize; }
962 bool *Data() { return fBools; }
963};
964using BoolArrayMap = std::map<std::string, BoolArray>;
965
966inline bool *UpdateBoolArrayIfBool(BoolArrayMap &boolArrays, RVec<bool> &v, const std::string &outName)
967{
968 // create a boolArrays entry
969 boolArrays[outName] = BoolArray(v);
970 return boolArrays[outName].Data();
971}
972
973template <typename T>
974T *UpdateBoolArrayIfBool(BoolArrayMap &, RVec<T> &v, const std::string &)
975{
976 return v.data();
977}
978
979// Helper which gets the return value of the data() method if the type is an
980// RVec (of anything but a bool), nullptr otherwise.
981inline void *GetData(ROOT::VecOps::RVec<bool> & /*v*/)
982{
983 return nullptr;
984}
985
986template <typename T>
987void *GetData(ROOT::VecOps::RVec<T> &v)
988{
989 return v.data();
990}
991
992template <typename T>
993void *GetData(T & /*v*/)
994{
995 return nullptr;
996}
997
998
999template <typename T>
1000void SetBranchesHelper(BoolArrayMap &, TTree * /*inputTree*/, TTree &outputTree, const std::string & /*validName*/,
1001 const std::string &name, TBranch *& branch, void *& branchAddress, T *address)
1002{
1003 outputTree.Branch(name.c_str(), address);
1004 branch = nullptr;
1005 branchAddress = nullptr;
1006}
1007
1008/// Helper function for SnapshotHelper and SnapshotHelperMT. It creates new branches for the output TTree of a Snapshot.
1009/// This overload is called for columns of type `RVec<T>`. For RDF, these can represent:
1010/// 1. c-style arrays in ROOT files, so we are sure that there are input trees to which we can ask the correct branch title
1011/// 2. RVecs coming from a custom column or a source
1012/// 3. vectors coming from ROOT files
1013/// In case of 1., we save the pointer to the branch and the pointer to the input value. In case of 2. and 3. we save
1014/// nullptrs.
1015template <typename T>
1016void SetBranchesHelper(BoolArrayMap &boolArrays, TTree *inputTree, TTree &outputTree, const std::string &inName,
1017 const std::string &outName, TBranch *&branch, void *&branchAddress, RVec<T> *ab)
1018{
1019 auto *const inputBranch = inputTree ? inputTree->GetBranch(inName.c_str()) : nullptr;
1020 const auto mustWriteStdVec =
1021 !inputBranch || ROOT::ESTLType::kSTLvector == TClassEdit::IsSTLCont(inputBranch->GetClassName());
1022
1023 if (mustWriteStdVec) {
1024 // Treat 2. and 3.:
1025 // 2. RVec coming from a custom column or a source
1026 // 3. RVec coming from a column on disk of type vector (the RVec is adopting the data of that vector)
1027 outputTree.Branch(outName.c_str(), &ab->AsVector());
1028 return;
1029 }
1030
1031 // Treat 1, the C-array case
1032 auto *const leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
1033 const auto bname = leaf->GetName();
1034 const auto counterStr =
1035 leaf->GetLeafCount() ? std::string(leaf->GetLeafCount()->GetName()) : std::to_string(leaf->GetLenStatic());
1036 const auto btype = leaf->GetTypeName();
1037 const auto rootbtype = TypeName2ROOTTypeName(btype);
1038 const auto leaflist = std::string(bname) + "[" + counterStr + "]/" + rootbtype;
1039
1040 /// RVec<bool> is special because std::vector<bool> is special. In particular, it has no `data()`,
1041 /// so we need to explicitly manage storage of the data that the tree needs to Fill branches with.
1042 auto dataPtr = UpdateBoolArrayIfBool(boolArrays, *ab, outName);
1043
1044 auto *const outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str());
1045 outputBranch->SetTitle(inputBranch->GetTitle());
1046
1047 // Record the branch ptr and the address associated to it if this is not a bool array
1048 if (!std::is_same<bool, T>::value) {
1049 branch = outputBranch;
1050 branchAddress = GetData(*ab);
1051 }
1052}
1053
1054// generic version, no-op
1055template <typename T>
1056void UpdateBoolArray(BoolArrayMap &, T&, const std::string &, TTree &) {}
1057
1058// RVec<bool> overload, update boolArrays if needed
1059inline void UpdateBoolArray(BoolArrayMap &boolArrays, RVec<bool> &v, const std::string &outName, TTree &t)
1060{
1061 if (v.size() > boolArrays[outName].Size()) {
1062 boolArrays[outName] = BoolArray(v); // resize and copy
1063 t.SetBranchAddress(outName.c_str(), boolArrays[outName].Data());
1064 }
1065 else {
1066 std::copy(v.begin(), v.end(), boolArrays[outName].Data()); // just copy
1067 }
1068}
1069
1070/// Helper object for a single-thread Snapshot action
1071template <typename... BranchTypes>
1072class SnapshotHelper : public RActionImpl<SnapshotHelper<BranchTypes...>> {
1073 const std::string fFileName;
1074 const std::string fDirName;
1075 const std::string fTreeName;
1076 const RSnapshotOptions fOptions;
1077 std::unique_ptr<TFile> fOutputFile;
1078 std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
1079 bool fIsFirstEvent{true};
1080 const ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1081 const ColumnNames_t fOutputBranchNames;
1082 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
1083 BoolArrayMap fBoolArrays; // Storage for C arrays of bools to be written out
1084 std::vector<TBranch *> fBranches; // Addresses of branches in output, non-null only for the ones holding C arrays
1085 std::vector<void *> fBranchAddresses; // Addresses associated to output branches, non-null only for the ones holding C arrays
1086
1087public:
1088 using ColumnTypes_t = TypeList<BranchTypes...>;
1089 SnapshotHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
1090 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options)
1091 : fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1092 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fBranches(vbnames.size(), nullptr),
1093 fBranchAddresses(vbnames.size(), nullptr)
1094 {
1095 }
1096
1097 SnapshotHelper(const SnapshotHelper &) = delete;
1098 SnapshotHelper(SnapshotHelper &&) = default;
1099
1100 void InitTask(TTreeReader *r, unsigned int /* slot */)
1101 {
1102 if (!r) // empty source, nothing to do
1103 return;
1104 fInputTree = r->GetTree();
1105 // AddClone guarantees that if the input file changes the branches of the output tree are updated with the new
1106 // addresses of the branch values
1107 fInputTree->AddClone(fOutputTree.get());
1108 }
1109
1110 void Exec(unsigned int /* slot */, BranchTypes &... values)
1111 {
1112 using ind_t = std::index_sequence_for<BranchTypes...>;
1113 if (! fIsFirstEvent) {
1114 UpdateCArraysPtrs(values..., ind_t{});
1115 } else {
1116 SetBranches(values..., ind_t{});
1117 fIsFirstEvent = false;
1118 }
1119 UpdateBoolArrays(values..., ind_t{});
1120 fOutputTree->Fill();
1121 }
1122
1123 template <std::size_t... S>
1124 void UpdateCArraysPtrs(BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1125 {
1126 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1127 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1128 // leaving associated to the branch of the output tree an invalid pointer.
1129 // With this code, we set the value of the pointer in the output branch anew when needed.
1130 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1131 // we need an int for the expander list.
1132 int expander[] = {(fBranches[S] && fBranchAddresses[S] != GetData(values)
1133 ? fBranches[S]->SetAddress(GetData(values)),
1134 fBranchAddresses[S] = GetData(values), 0 : 0, 0)...,
1135 0};
1136 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1137 }
1138
1139 template <std::size_t... S>
1140 void SetBranches(BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1141 {
1142 // create branches in output tree (and fill fBoolArrays for RVec<bool> columns)
1143 int expander[] = {(SetBranchesHelper(fBoolArrays, fInputTree, *fOutputTree, fInputBranchNames[S],
1144 fOutputBranchNames[S], fBranches[S], fBranchAddresses[S], &values),
1145 0)...,
1146 0};
1147 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1148 }
1149
1150 template <std::size_t... S>
1151 void UpdateBoolArrays(BranchTypes &...values, std::index_sequence<S...> /*dummy*/)
1152 {
1153 int expander[] = {(UpdateBoolArray(fBoolArrays, values, fOutputBranchNames[S], *fOutputTree), 0)..., 0};
1154 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1155 }
1156
1157 void Initialize()
1158 {
1159 fOutputFile.reset(
1160 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
1162
1163 if (!fDirName.empty()) {
1164 fOutputFile->mkdir(fDirName.c_str());
1165 fOutputFile->cd(fDirName.c_str());
1166 }
1167
1168 fOutputTree =
1169 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/fOutputFile.get());
1170
1171 if (fOptions.fAutoFlush)
1172 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
1173 }
1174
1175 void Finalize()
1176 {
1177 if (fOutputFile && fOutputTree) {
1178 ::TDirectory::TContext ctxt(fOutputFile->GetDirectory(fDirName.c_str()));
1179 fOutputTree->Write();
1180 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
1181 fOutputTree.reset();
1182 fOutputFile->Close();
1183 } else {
1184 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1185 }
1186 }
1187
1188 std::string GetActionName() { return "Snapshot"; }
1189};
1190
1191/// Helper object for a multi-thread Snapshot action
1192template <typename... BranchTypes>
1193class SnapshotHelperMT : public RActionImpl<SnapshotHelperMT<BranchTypes...>> {
1194 const unsigned int fNSlots;
1195 std::unique_ptr<ROOT::Experimental::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
1196 std::vector<std::shared_ptr<ROOT::Experimental::TBufferMergerFile>> fOutputFiles;
1197 std::vector<std::unique_ptr<TTree>> fOutputTrees;
1198 std::vector<int> fIsFirstEvent; // vector<bool> does not allow concurrent writing of different elements
1199 const std::string fFileName; // name of the output file name
1200 const std::string fDirName; // name of TFile subdirectory in which output must be written (possibly empty)
1201 const std::string fTreeName; // name of output tree
1202 const RSnapshotOptions fOptions; // struct holding options to pass down to TFile and TTree in this action
1203 const ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1204 const ColumnNames_t fOutputBranchNames;
1205 std::vector<TTree *> fInputTrees; // Current input trees. Set at initialization time (`InitTask`)
1206 std::vector<BoolArrayMap> fBoolArrays; // Per-thread storage for C arrays of bools to be written out
1207 // Addresses of branches in output per slot, non-null only for the ones holding C arrays
1208 std::vector<std::vector<TBranch *>> fBranches;
1209 // Addresses associated to output branches per slot, non-null only for the ones holding C arrays
1210 std::vector<std::vector<void *>> fBranchAddresses;
1211
1212public:
1213 using ColumnTypes_t = TypeList<BranchTypes...>;
1214 SnapshotHelperMT(const unsigned int nSlots, std::string_view filename, std::string_view dirname,
1215 std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
1216 const RSnapshotOptions &options)
1217 : fNSlots(nSlots), fOutputFiles(fNSlots), fOutputTrees(fNSlots), fIsFirstEvent(fNSlots, 1), fFileName(filename),
1218 fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1219 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fInputTrees(fNSlots), fBoolArrays(fNSlots),
1220 fBranches(fNSlots, std::vector<TBranch *>(vbnames.size(), nullptr)),
1221 fBranchAddresses(fNSlots, std::vector<void *>(vbnames.size(), nullptr))
1222 {
1223 }
1224 SnapshotHelperMT(const SnapshotHelperMT &) = delete;
1225 SnapshotHelperMT(SnapshotHelperMT &&) = default;
1226
1227 void InitTask(TTreeReader *r, unsigned int slot)
1228 {
1229 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
1230 if (!fOutputFiles[slot]) {
1231 // first time this thread executes something, let's create a TBufferMerger output directory
1232 fOutputFiles[slot] = fMerger->GetFile();
1233 }
1234 TDirectory *treeDirectory = fOutputFiles[slot].get();
1235 if (!fDirName.empty()) {
1236 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str());
1237 }
1238 // re-create output tree as we need to create its branches again, with new input variables
1239 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
1240 fOutputTrees[slot] =
1241 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
1242 if (fOptions.fAutoFlush)
1243 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
1244 if (r) {
1245 // not an empty-source RDF
1246 fInputTrees[slot] = r->GetTree();
1247 // AddClone guarantees that if the input file changes the branches of the output tree are updated with the new
1248 // addresses of the branch values. We need this in case of friend trees with different cluster granularity
1249 // than the main tree.
1250 // FIXME: AddClone might result in many many (safe) warnings printed by TTree::CopyAddresses, see ROOT-9487.
1251 const auto friendsListPtr = fInputTrees[slot]->GetListOfFriends();
1252 if (friendsListPtr && friendsListPtr->GetEntries() > 0)
1253 fInputTrees[slot]->AddClone(fOutputTrees[slot].get());
1254 }
1255 fIsFirstEvent[slot] = 1; // reset first event flag for this slot
1256 }
1257
1258 void FinalizeTask(unsigned int slot)
1259 {
1260 if (fOutputTrees[slot]->GetEntries() > 0)
1261 fOutputFiles[slot]->Write();
1262 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
1263 fOutputTrees[slot].reset(nullptr);
1264 }
1265
1266 void Exec(unsigned int slot, BranchTypes &... values)
1267 {
1268 using ind_t = std::index_sequence_for<BranchTypes...>;
1269 if (!fIsFirstEvent[slot]) {
1270 UpdateCArraysPtrs(slot, values..., ind_t{});
1271 } else {
1272 SetBranches(slot, values..., ind_t{});
1273 fIsFirstEvent[slot] = 0;
1274 }
1275 UpdateBoolArrays(slot, values..., ind_t{});
1276 fOutputTrees[slot]->Fill();
1277 auto entries = fOutputTrees[slot]->GetEntries();
1278 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
1279 if ((autoFlush > 0) && (entries % autoFlush == 0))
1280 fOutputFiles[slot]->Write();
1281 }
1282
1283 template <std::size_t... S>
1284 void UpdateCArraysPtrs(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1285 {
1286 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1287 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1288 // leaving associated to the branch of the output tree an invalid pointer.
1289 // With this code, we set the value of the pointer in the output branch anew when needed.
1290 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1291 // we need an int for the expander list.
1292 int expander[] = {(fBranches[slot][S] && fBranchAddresses[slot][S] != GetData(values)
1293 ? fBranches[slot][S]->SetAddress(GetData(values)),
1294 fBranchAddresses[slot][S] = GetData(values), 0 : 0, 0)...,
1295 0};
1296 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1297 }
1298
1299 template <std::size_t... S>
1300 void SetBranches(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1301 {
1302 // hack to call TTree::Branch on all variadic template arguments
1303 int expander[] = {
1304 (SetBranchesHelper(fBoolArrays[slot], fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S],
1305 fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S], &values),
1306 0)...,
1307 0};
1308 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1309 (void)slot; // avoid unused variable warnings in gcc6.2
1310 }
1311
1312 template <std::size_t... S>
1313 void UpdateBoolArrays(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1314 {
1315 int expander[] = {
1316 (UpdateBoolArray(fBoolArrays[slot], values, fOutputBranchNames[S], *fOutputTrees[slot]), 0)..., 0};
1317 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1318 }
1319
1320 void Initialize()
1321 {
1322 const auto cs = ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
1323 fMerger = std::make_unique<ROOT::Experimental::TBufferMerger>(fFileName.c_str(), fOptions.fMode.c_str(), cs);
1324 }
1325
1326 void Finalize()
1327 {
1328 auto fileWritten = false;
1329 for (auto &file : fOutputFiles) {
1330 if (file) {
1331 file->Write();
1332 file->Close();
1333 fileWritten = true;
1334 }
1335 }
1336
1337 if (!fileWritten) {
1338 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1339 }
1340
1341 // flush all buffers to disk by destroying the TBufferMerger
1342 fOutputFiles.clear();
1343 fMerger.reset();
1344 }
1345
1346 std::string GetActionName() { return "Snapshot"; }
1347};
1348
1349template <typename Acc, typename Merge, typename R, typename T, typename U,
1350 bool MustCopyAssign = std::is_same<R, U>::value>
1351class AggregateHelper : public RActionImpl<AggregateHelper<Acc, Merge, R, T, U, MustCopyAssign>> {
1352 Acc fAggregate;
1353 Merge fMerge;
1354 const std::shared_ptr<U> fResult;
1355 Results<U> fAggregators;
1356
1357public:
1358 using ColumnTypes_t = TypeList<T>;
1359 AggregateHelper(Acc &&f, Merge &&m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1360 : fAggregate(std::move(f)), fMerge(std::move(m)), fResult(result), fAggregators(nSlots, *result)
1361 {
1362 }
1363 AggregateHelper(AggregateHelper &&) = default;
1364 AggregateHelper(const AggregateHelper &) = delete;
1365
1366 void InitTask(TTreeReader *, unsigned int) {}
1367
1369 void Exec(unsigned int slot, const T &value)
1370 {
1371 fAggregators[slot] = fAggregate(fAggregators[slot], value);
1372 }
1373
1375 void Exec(unsigned int slot, const T &value)
1376 {
1377 fAggregate(fAggregators[slot], value);
1378 }
1379
1380 void Initialize() { /* noop */}
1381
1382 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1383 bool MergeAll = std::is_same<void, MergeRet>::value>
1384 typename std::enable_if<MergeAll, void>::type Finalize()
1385 {
1386 fMerge(fAggregators);
1387 *fResult = fAggregators[0];
1388 }
1389
1390 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1391 bool MergeTwoByTwo = std::is_same<U, MergeRet>::value>
1392 typename std::enable_if<MergeTwoByTwo, void>::type Finalize(...) // ... needed to let compiler distinguish overloads
1393 {
1394 for (const auto &acc : fAggregators)
1395 *fResult = fMerge(*fResult, acc);
1396 }
1397
1398 U &PartialUpdate(unsigned int slot) { return fAggregators[slot]; }
1399
1400 std::string GetActionName() { return "Aggregate"; }
1401};
1402
1403} // end of NS RDF
1404} // end of NS Internal
1405} // end of NS ROOT
1406
1407/// \endcond
1408
1409#endif
SVector< double, 2 > v
Definition: Dict.h:5
Handle_t Display_t
Definition: GuiTypes.h:26
ROOT::R::TRInterface & r
Definition: Object.C:4
#define d(i)
Definition: RSha256.hxx:102
#define b(i)
Definition: RSha256.hxx:100
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
#define g(i)
Definition: RSha256.hxx:105
#define h(i)
Definition: RSha256.hxx:106
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
static const double x2[5]
static const double x1[5]
static const double x3[11]
unsigned long long ULong64_t
Definition: RtypesCore.h:70
void Warning(const char *location, const char *msgfmt,...)
int type
Definition: TGX11.cxx:120
Binding & operator=(OUT(*fun)(void))
typedef void((*Func_t)())
This class is the textual representation of the content of a columnar dataset.
Definition: RDisplay.hxx:63
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition: RVec.hxx:221
iterator end() noexcept
Definition: RVec.hxx:332
const Impl_t & AsVector() const
Definition: RVec.hxx:297
iterator begin() noexcept
Definition: RVec.hxx:329
A TTree is a list of TBranches.
Definition: TBranch.h:64
Describe directory structure in memory.
Definition: TDirectory.h:34
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseGeneralPurpose, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3975
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:614
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition: TLeaf.h:32
A doubly linked list.
Definition: TList.h:44
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:785
A simple, robust and fast interface to read values from ROOT colmnar datasets such as TTree,...
Definition: TTreeReader.h:44
Double_t x[n]
Definition: legend1.C:17
#define F(x, y, z)
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition: RDFUtils.cxx:259
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition: RDFUtils.cxx:219
double T(double x)
Definition: ChebyshevPol.h:34
ROOT type_traits extensions.
Definition: TypeTraits.hxx:23
Namespace for new ROOT classes and functions.
Definition: StringConv.hxx:21
@ kSTLvector
Definition: ESTLType.h:30
ROOT::Detail::RDF::ColumnNames_t ColumnNames_t
Definition: RDataFrame.cxx:790
int CompressionSettings(RCompressionSetting::EAlgorithm algorithm, int compressionLevel)
RooArgSet S(const RooAbsArg &v1)
RooCmdArg Columns(Int_t ncol)
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
Definition: file.py:1
STL namespace.
make_index_sequence< sizeof...(_Tp)> index_sequence_for
basic_string_view< char > string_view
Definition: RStringView.hxx:35
const char * Size
Definition: TXMLSetup.cxx:55
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
int fCompressionLevel
Compression level of output file.
Check for container traits.
Definition: TypeTraits.hxx:98
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:27
auto * m
Definition: textangle.C:8
auto * l
Definition: textangle.C:4
void ws()
Definition: ws.C:63