Logo ROOT  
Reference Guide
ActionHelpers.hxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 12/2016
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
11#ifndef ROOT_RDFOPERATIONS
12#define ROOT_RDFOPERATIONS
13
14#include "Compression.h"
16#include "ROOT/RStringView.hxx"
17#include "ROOT/RVec.hxx"
18#include "ROOT/TBufferMerger.hxx" // for SnapshotHelper
20#include "ROOT/RDF/Utils.hxx"
21#include "ROOT/RMakeUnique.hxx"
23#include "ROOT/TypeTraits.hxx"
24#include "ROOT/RDF/RDisplay.hxx"
25#include "RtypesCore.h"
26#include "TBranch.h"
27#include "TClassEdit.h"
28#include "TClassRef.h"
29#include "TDirectory.h"
30#include "TFile.h" // for SnapshotHelper
31#include "TH1.h"
32#include "TGraph.h"
33#include "TLeaf.h"
34#include "TObject.h"
35#include "TTree.h"
36#include "TTreeReader.h" // for SnapshotHelper
37
38#include <algorithm>
39#include <limits>
40#include <memory>
41#include <stdexcept>
42#include <string>
43#include <type_traits>
44#include <vector>
45#include <iomanip>
46
47/// \cond HIDDEN_SYMBOLS
48
49namespace ROOT {
50namespace Detail {
51namespace RDF {
52template <typename Helper>
53class RActionImpl {
54public:
55 // call Helper::FinalizeTask if present, do nothing otherwise
56 template <typename T = Helper>
57 auto CallFinalizeTask(unsigned int slot) -> decltype(&T::FinalizeTask, void())
58 {
59 static_cast<Helper *>(this)->FinalizeTask(slot);
60 }
61
62 template <typename... Args>
63 void CallFinalizeTask(unsigned int, Args...) {}
64
65};
66
67} // namespace RDF
68} // namespace Detail
69
70namespace Internal {
71namespace RDF {
72using namespace ROOT::TypeTraits;
73using namespace ROOT::VecOps;
74using namespace ROOT::RDF;
75using namespace ROOT::Detail::RDF;
76
77using Hist_t = ::TH1D;
78
79/// The container type for each thread's partial result in an action helper
80// We have to avoid to instantiate std::vector<bool> as that makes it impossible to return a reference to one of
81// the thread-local results. In addition, a common definition for the type of the container makes it easy to swap
82// the type of the underlying container if e.g. we see problems with false sharing of the thread-local results..
83template <typename T>
84using Results = typename std::conditional<std::is_same<T, bool>::value, std::deque<T>, std::vector<T>>::type;
85
86template <typename F>
87class ForeachSlotHelper : public RActionImpl<ForeachSlotHelper<F>> {
89
90public:
91 using ColumnTypes_t = RemoveFirstParameter_t<typename CallableTraits<F>::arg_types>;
92 ForeachSlotHelper(F &&f) : fCallable(f) {}
93 ForeachSlotHelper(ForeachSlotHelper &&) = default;
94 ForeachSlotHelper(const ForeachSlotHelper &) = delete;
95
96 void InitTask(TTreeReader *, unsigned int) {}
97
98 template <typename... Args>
99 void Exec(unsigned int slot, Args &&... args)
100 {
101 // check that the decayed types of Args are the same as the branch types
102 static_assert(std::is_same<TypeList<typename std::decay<Args>::type...>, ColumnTypes_t>::value, "");
103 fCallable(slot, std::forward<Args>(args)...);
104 }
105
106 void Initialize() { /* noop */}
107
108 void Finalize() { /* noop */}
109
110 std::string GetActionName() { return "ForeachSlot"; }
111};
112
113class CountHelper : public RActionImpl<CountHelper> {
114 const std::shared_ptr<ULong64_t> fResultCount;
115 Results<ULong64_t> fCounts;
116
117public:
118 using ColumnTypes_t = TypeList<>;
119 CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots);
120 CountHelper(CountHelper &&) = default;
121 CountHelper(const CountHelper &) = delete;
122 void InitTask(TTreeReader *, unsigned int) {}
123 void Exec(unsigned int slot);
124 void Initialize() { /* noop */}
125 void Finalize();
126 ULong64_t &PartialUpdate(unsigned int slot);
127
128 std::string GetActionName() { return "Count"; }
129};
130
131template <typename ProxiedVal_t>
132class ReportHelper : public RActionImpl<ReportHelper<ProxiedVal_t>> {
133 const std::shared_ptr<RCutFlowReport> fReport;
134 // Here we have a weak pointer since we need to keep track of the validity
135 // of the proxied node. It can happen that the user does not trigger the
136 // event loop by looking into the RResultPtr and the chain goes out of scope
137 // before the Finalize method is invoked.
138 std::weak_ptr<ProxiedVal_t> fProxiedWPtr;
139 bool fReturnEmptyReport;
140
141public:
142 using ColumnTypes_t = TypeList<>;
143 ReportHelper(const std::shared_ptr<RCutFlowReport> &report, const std::shared_ptr<ProxiedVal_t> &pp, bool emptyRep)
144 : fReport(report), fProxiedWPtr(pp), fReturnEmptyReport(emptyRep){};
145 ReportHelper(ReportHelper &&) = default;
146 ReportHelper(const ReportHelper &) = delete;
147 void InitTask(TTreeReader *, unsigned int) {}
148 void Exec(unsigned int /* slot */) {}
149 void Initialize() { /* noop */}
150 void Finalize()
151 {
152 // We need the weak_ptr in order to avoid crashes at tear down
153 if (!fReturnEmptyReport && !fProxiedWPtr.expired())
154 fProxiedWPtr.lock()->Report(*fReport);
155 }
156
157 std::string GetActionName() { return "Report"; }
158};
159
160class FillHelper : public RActionImpl<FillHelper> {
161 // this sets a total initial size of 16 MB for the buffers (can increase)
162 static constexpr unsigned int fgTotalBufSize = 2097152;
163 using BufEl_t = double;
164 using Buf_t = std::vector<BufEl_t>;
165
166 std::vector<Buf_t> fBuffers;
167 std::vector<Buf_t> fWBuffers;
168 const std::shared_ptr<Hist_t> fResultHist;
169 unsigned int fNSlots;
170 unsigned int fBufSize;
171 /// Histograms containing "snapshots" of partial results. Non-null only if a registered callback requires it.
172 Results<std::unique_ptr<Hist_t>> fPartialHists;
173 Buf_t fMin;
174 Buf_t fMax;
175
176 void UpdateMinMax(unsigned int slot, double v);
177
178public:
179 FillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots);
180 FillHelper(FillHelper &&) = default;
181 FillHelper(const FillHelper &) = delete;
182 void InitTask(TTreeReader *, unsigned int) {}
183 void Exec(unsigned int slot, double v);
184 void Exec(unsigned int slot, double v, double w);
185
186 template <typename T, typename std::enable_if<IsDataContainer<T>::value || std::is_same<T, std::string>::value, int>::type = 0>
187 void Exec(unsigned int slot, const T &vs)
188 {
189 auto &thisBuf = fBuffers[slot];
190 for (auto &v : vs) {
191 UpdateMinMax(slot, v);
192 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
193 }
194 }
195
196 template <typename T, typename W,
197 typename std::enable_if<IsDataContainer<T>::value && IsDataContainer<W>::value, int>::type = 0>
198 void Exec(unsigned int slot, const T &vs, const W &ws)
199 {
200 auto &thisBuf = fBuffers[slot];
201
202 for (auto &v : vs) {
203 UpdateMinMax(slot, v);
204 thisBuf.emplace_back(v);
205 }
206
207 auto &thisWBuf = fWBuffers[slot];
208 for (auto &w : ws) {
209 thisWBuf.emplace_back(w); // TODO: Can be optimised in case T == BufEl_t
210 }
211 }
212
213 template <typename T, typename W,
214 typename std::enable_if<IsDataContainer<T>::value && !IsDataContainer<W>::value, int>::type = 0>
215 void Exec(unsigned int slot, const T &vs, const W w)
216 {
217 auto &thisBuf = fBuffers[slot];
218 for (auto &v : vs) {
219 UpdateMinMax(slot, v);
220 thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
221 }
222
223 auto &thisWBuf = fWBuffers[slot];
224 thisWBuf.insert(thisWBuf.end(), vs.size(), w);
225 }
226
227 // ROOT-10092: Filling with a scalar as first column and a collection as second is not supported
228 template <typename T, typename W,
229 typename std::enable_if<IsDataContainer<W>::value && !IsDataContainer<T>::value, int>::type = 0>
230 void Exec(unsigned int, const T &, const W &)
231 {
232 throw std::runtime_error(
233 "Cannot fill object if the type of the first column is a scalar and the one of the second a container.");
234 }
235
236 Hist_t &PartialUpdate(unsigned int);
237
238 void Initialize() { /* noop */}
239
240 void Finalize();
241
242 std::string GetActionName() { return "Fill"; }
243};
244
245extern template void FillHelper::Exec(unsigned int, const std::vector<float> &);
246extern template void FillHelper::Exec(unsigned int, const std::vector<double> &);
247extern template void FillHelper::Exec(unsigned int, const std::vector<char> &);
248extern template void FillHelper::Exec(unsigned int, const std::vector<int> &);
249extern template void FillHelper::Exec(unsigned int, const std::vector<unsigned int> &);
250extern template void FillHelper::Exec(unsigned int, const std::vector<float> &, const std::vector<float> &);
251extern template void FillHelper::Exec(unsigned int, const std::vector<double> &, const std::vector<double> &);
252extern template void FillHelper::Exec(unsigned int, const std::vector<char> &, const std::vector<char> &);
253extern template void FillHelper::Exec(unsigned int, const std::vector<int> &, const std::vector<int> &);
254extern template void
255FillHelper::Exec(unsigned int, const std::vector<unsigned int> &, const std::vector<unsigned int> &);
256
257template <typename HIST = Hist_t>
258class FillParHelper : public RActionImpl<FillParHelper<HIST>> {
259 std::vector<HIST *> fObjects;
260
261public:
262 FillParHelper(FillParHelper &&) = default;
263 FillParHelper(const FillParHelper &) = delete;
264
265 FillParHelper(const std::shared_ptr<HIST> &h, const unsigned int nSlots) : fObjects(nSlots, nullptr)
266 {
267 fObjects[0] = h.get();
268 // Initialise all other slots
269 for (unsigned int i = 1; i < nSlots; ++i) {
270 fObjects[i] = new HIST(*fObjects[0]);
271 if (auto objAsHist = dynamic_cast<TH1*>(fObjects[i])) {
272 objAsHist->SetDirectory(nullptr);
273 }
274 }
275 }
276
277 void InitTask(TTreeReader *, unsigned int) {}
278
279 void Exec(unsigned int slot, double x0) // 1D histos
280 {
281 fObjects[slot]->Fill(x0);
282 }
283
284 void Exec(unsigned int slot, double x0, double x1) // 1D weighted and 2D histos
285 {
286 fObjects[slot]->Fill(x0, x1);
287 }
288
289 void Exec(unsigned int slot, double x0, double x1, double x2) // 2D weighted and 3D histos
290 {
291 fObjects[slot]->Fill(x0, x1, x2);
292 }
293
294 void Exec(unsigned int slot, double x0, double x1, double x2, double x3) // 3D weighted histos
295 {
296 fObjects[slot]->Fill(x0, x1, x2, x3);
297 }
298
299 template <typename X0, typename std::enable_if<IsDataContainer<X0>::value || std::is_same<X0, std::string>::value, int>::type = 0>
300 void Exec(unsigned int slot, const X0 &x0s)
301 {
302 auto thisSlotH = fObjects[slot];
303 for (auto &x0 : x0s) {
304 thisSlotH->Fill(x0); // TODO: Can be optimised in case T == vector<double>
305 }
306 }
307
308 // ROOT-10092: Filling with a scalar as first column and a collection as second is not supported
309 template <typename X0, typename X1,
310 typename std::enable_if<IsDataContainer<X1>::value && !IsDataContainer<X0>::value, int>::type = 0>
311 void Exec(unsigned int , const X0 &, const X1 &)
312 {
313 throw std::runtime_error(
314 "Cannot fill object if the type of the first column is a scalar and the one of the second a container.");
315 }
316
317 template <typename X0, typename X1,
318 typename std::enable_if<IsDataContainer<X0>::value && IsDataContainer<X1>::value, int>::type = 0>
319 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
320 {
321 auto thisSlotH = fObjects[slot];
322 if (x0s.size() != x1s.size()) {
323 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
324 }
325 auto x0sIt = std::begin(x0s);
326 const auto x0sEnd = std::end(x0s);
327 auto x1sIt = std::begin(x1s);
328 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
329 thisSlotH->Fill(*x0sIt, *x1sIt); // TODO: Can be optimised in case T == vector<double>
330 }
331 }
332
333 template <typename X0, typename W,
334 typename std::enable_if<IsDataContainer<X0>::value && !IsDataContainer<W>::value, int>::type = 0>
335 void Exec(unsigned int slot, const X0 &x0s, const W w)
336 {
337 auto thisSlotH = fObjects[slot];
338 for (auto &&x : x0s) {
339 thisSlotH->Fill(x, w);
340 }
341 }
342
343 template <typename X0, typename X1, typename X2,
344 typename std::enable_if<IsDataContainer<X0>::value && IsDataContainer<X1>::value && IsDataContainer<X2>::value,
345 int>::type = 0>
346 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s)
347 {
348 auto thisSlotH = fObjects[slot];
349 if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size())) {
350 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
351 }
352 auto x0sIt = std::begin(x0s);
353 const auto x0sEnd = std::end(x0s);
354 auto x1sIt = std::begin(x1s);
355 auto x2sIt = std::begin(x2s);
356 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++) {
357 thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt); // TODO: Can be optimised in case T == vector<double>
358 }
359 }
360
361 template <typename X0, typename X1, typename W,
362 typename std::enable_if<IsDataContainer<X0>::value && IsDataContainer<X1>::value && !IsDataContainer<W>::value,
363 int>::type = 0>
364 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const W w)
365 {
366 auto thisSlotH = fObjects[slot];
367 if (x0s.size() != x1s.size()) {
368 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
369 }
370 auto x0sIt = std::begin(x0s);
371 const auto x0sEnd = std::end(x0s);
372 auto x1sIt = std::begin(x1s);
373 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
374 thisSlotH->Fill(*x0sIt, *x1sIt, w); // TODO: Can be optimised in case T == vector<double>
375 }
376 }
377
378 template <typename X0, typename X1, typename X2, typename X3,
379 typename std::enable_if<IsDataContainer<X0>::value && IsDataContainer<X1>::value && IsDataContainer<X2>::value &&
380 IsDataContainer<X3>::value,
381 int>::type = 0>
382 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s, const X3 &x3s)
383 {
384 auto thisSlotH = fObjects[slot];
385 if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size() && x1s.size() == x3s.size())) {
386 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
387 }
388 auto x0sIt = std::begin(x0s);
389 const auto x0sEnd = std::end(x0s);
390 auto x1sIt = std::begin(x1s);
391 auto x2sIt = std::begin(x2s);
392 auto x3sIt = std::begin(x3s);
393 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++, x3sIt++) {
394 thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt, *x3sIt); // TODO: Can be optimised in case T == vector<double>
395 }
396 }
397
398 template <typename X0, typename X1, typename X2, typename W,
399 typename std::enable_if<IsDataContainer<X0>::value && IsDataContainer<X1>::value && IsDataContainer<X2>::value &&
400 !IsDataContainer<W>::value,
401 int>::type = 0>
402 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s, const W w)
403 {
404 auto thisSlotH = fObjects[slot];
405 if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size())) {
406 throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
407 }
408 auto x0sIt = std::begin(x0s);
409 const auto x0sEnd = std::end(x0s);
410 auto x1sIt = std::begin(x1s);
411 auto x2sIt = std::begin(x2s);
412 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++) {
413 thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt, w);
414 }
415 }
416
417 void Initialize() { /* noop */}
418
419 void Finalize()
420 {
421 auto resObj = fObjects[0];
422 const auto nSlots = fObjects.size();
423 TList l;
424 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
425 for (unsigned int slot = 1; slot < nSlots; ++slot) {
426 l.Add(fObjects[slot]);
427 }
428
429 resObj->Merge(&l);
430 }
431
432 HIST &PartialUpdate(unsigned int slot) { return *fObjects[slot]; }
433
434 std::string GetActionName() { return "FillPar"; }
435};
436
437class FillTGraphHelper : public ROOT::Detail::RDF::RActionImpl<FillTGraphHelper> {
438public:
439 using Result_t = ::TGraph;
440
441private:
442 std::vector<::TGraph *> fGraphs;
443
444public:
445 FillTGraphHelper(FillTGraphHelper &&) = default;
446 FillTGraphHelper(const FillTGraphHelper &) = delete;
447
448 // The last parameter is always false, as at the moment there is no way to propagate the parameter from the user to
449 // this method
450 FillTGraphHelper(const std::shared_ptr<::TGraph> &g, const unsigned int nSlots) : fGraphs(nSlots, nullptr)
451 {
452 fGraphs[0] = g.get();
453 // Initialise all other slots
454 for (unsigned int i = 1; i < nSlots; ++i) {
455 fGraphs[i] = new TGraph(*fGraphs[0]);
456 }
457 }
458
459 void Initialize() {}
460 void InitTask(TTreeReader *, unsigned int) {}
461
462 template <typename X0, typename X1,
463 typename std::enable_if<IsDataContainer<X0>::value && IsDataContainer<X1>::value, int>::type = 0>
464 void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
465 {
466 if (x0s.size() != x1s.size()) {
467 throw std::runtime_error("Cannot fill Graph with values in containers of different sizes.");
468 }
469 auto thisSlotG = fGraphs[slot];
470 auto x0sIt = std::begin(x0s);
471 const auto x0sEnd = std::end(x0s);
472 auto x1sIt = std::begin(x1s);
473 for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
474 thisSlotG->SetPoint(thisSlotG->GetN(), *x0sIt, *x1sIt);
475 }
476 }
477
478 template <typename X0, typename X1>
479 void Exec(unsigned int slot, X0 x0, X1 x1)
480 {
481 auto thisSlotG = fGraphs[slot];
482 thisSlotG->SetPoint(thisSlotG->GetN(), x0, x1);
483 }
484
485 void Finalize()
486 {
487 const auto nSlots = fGraphs.size();
488 auto resGraph = fGraphs[0];
489 TList l;
490 l.SetOwner(); // The list will free the memory associated to its elements upon destruction
491 for (unsigned int slot = 1; slot < nSlots; ++slot) {
492 l.Add(fGraphs[slot]);
493 }
494 resGraph->Merge(&l);
495 }
496
497 std::string GetActionName() { return "Graph"; }
498
499 Result_t &PartialUpdate(unsigned int slot) { return *fGraphs[slot]; }
500};
501
502// In case of the take helper we have 4 cases:
503// 1. The column is not an RVec, the collection is not a vector
504// 2. The column is not an RVec, the collection is a vector
505// 3. The column is an RVec, the collection is not a vector
506// 4. The column is an RVec, the collection is a vector
507
508template <typename V, typename COLL>
509void FillColl(V&& v, COLL& c) {
510 c.emplace_back(v);
511}
512
513// Use push_back for bool since some compilers do not support emplace_back.
514template <typename COLL>
515void FillColl(bool v, COLL& c) {
516 c.push_back(v);
517}
518
519// Case 1.: The column is not an RVec, the collection is not a vector
520// No optimisations, no transformations: just copies.
521template <typename RealT_t, typename T, typename COLL>
522class TakeHelper : public RActionImpl<TakeHelper<RealT_t, T, COLL>> {
523 Results<std::shared_ptr<COLL>> fColls;
524
525public:
526 using ColumnTypes_t = TypeList<T>;
527 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
528 {
529 fColls.emplace_back(resultColl);
530 for (unsigned int i = 1; i < nSlots; ++i)
531 fColls.emplace_back(std::make_shared<COLL>());
532 }
533 TakeHelper(TakeHelper &&);
534 TakeHelper(const TakeHelper &) = delete;
535
536 void InitTask(TTreeReader *, unsigned int) {}
537
538 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
539
540 void Initialize() { /* noop */}
541
542 void Finalize()
543 {
544 auto rColl = fColls[0];
545 for (unsigned int i = 1; i < fColls.size(); ++i) {
546 const auto &coll = fColls[i];
547 const auto end = coll->end();
548 // Use an explicit loop here to prevent compiler warnings introduced by
549 // clang's range-based loop analysis and vector<bool> references.
550 for (auto j = coll->begin(); j != end; j++) {
551 FillColl(*j, *rColl);
552 }
553 }
554 }
555
556 COLL &PartialUpdate(unsigned int slot) { return *fColls[slot].get(); }
557
558 std::string GetActionName() { return "Take"; }
559};
560
561// Case 2.: The column is not an RVec, the collection is a vector
562// Optimisations, no transformations: just copies.
563template <typename RealT_t, typename T>
564class TakeHelper<RealT_t, T, std::vector<T>> : public RActionImpl<TakeHelper<RealT_t, T, std::vector<T>>> {
565 Results<std::shared_ptr<std::vector<T>>> fColls;
566
567public:
568 using ColumnTypes_t = TypeList<T>;
569 TakeHelper(const std::shared_ptr<std::vector<T>> &resultColl, const unsigned int nSlots)
570 {
571 fColls.emplace_back(resultColl);
572 for (unsigned int i = 1; i < nSlots; ++i) {
573 auto v = std::make_shared<std::vector<T>>();
574 v->reserve(1024);
575 fColls.emplace_back(v);
576 }
577 }
578 TakeHelper(TakeHelper &&);
579 TakeHelper(const TakeHelper &) = delete;
580
581 void InitTask(TTreeReader *, unsigned int) {}
582
583 void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
584
585 void Initialize() { /* noop */}
586
587 // This is optimised to treat vectors
588 void Finalize()
589 {
590 ULong64_t totSize = 0;
591 for (auto &coll : fColls)
592 totSize += coll->size();
593 auto rColl = fColls[0];
594 rColl->reserve(totSize);
595 for (unsigned int i = 1; i < fColls.size(); ++i) {
596 auto &coll = fColls[i];
597 rColl->insert(rColl->end(), coll->begin(), coll->end());
598 }
599 }
600
601 std::vector<T> &PartialUpdate(unsigned int slot) { return *fColls[slot]; }
602
603 std::string GetActionName() { return "Take"; }
604};
605
606// Case 3.: The column is a RVec, the collection is not a vector
607// No optimisations, transformations from RVecs to vectors
608template <typename RealT_t, typename COLL>
609class TakeHelper<RealT_t, RVec<RealT_t>, COLL> : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, COLL>> {
610 Results<std::shared_ptr<COLL>> fColls;
611
612public:
613 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
614 TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
615 {
616 fColls.emplace_back(resultColl);
617 for (unsigned int i = 1; i < nSlots; ++i)
618 fColls.emplace_back(std::make_shared<COLL>());
619 }
620 TakeHelper(TakeHelper &&);
621 TakeHelper(const TakeHelper &) = delete;
622
623 void InitTask(TTreeReader *, unsigned int) {}
624
625 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
626
627 void Initialize() { /* noop */}
628
629 void Finalize()
630 {
631 auto rColl = fColls[0];
632 for (unsigned int i = 1; i < fColls.size(); ++i) {
633 auto &coll = fColls[i];
634 for (auto &v : *coll) {
635 rColl->emplace_back(v);
636 }
637 }
638 }
639
640 std::string GetActionName() { return "Take"; }
641};
642
643// Case 4.: The column is an RVec, the collection is a vector
644// Optimisations, transformations from RVecs to vectors
645template <typename RealT_t>
646class TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>
647 : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>> {
648
649 Results<std::shared_ptr<std::vector<std::vector<RealT_t>>>> fColls;
650
651public:
652 using ColumnTypes_t = TypeList<RVec<RealT_t>>;
653 TakeHelper(const std::shared_ptr<std::vector<std::vector<RealT_t>>> &resultColl, const unsigned int nSlots)
654 {
655 fColls.emplace_back(resultColl);
656 for (unsigned int i = 1; i < nSlots; ++i) {
657 auto v = std::make_shared<std::vector<RealT_t>>();
658 v->reserve(1024);
659 fColls.emplace_back(v);
660 }
661 }
662 TakeHelper(TakeHelper &&);
663 TakeHelper(const TakeHelper &) = delete;
664
665 void InitTask(TTreeReader *, unsigned int) {}
666
667 void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
668
669 void Initialize() { /* noop */}
670
671 // This is optimised to treat vectors
672 void Finalize()
673 {
674 ULong64_t totSize = 0;
675 for (auto &coll : fColls)
676 totSize += coll->size();
677 auto rColl = fColls[0];
678 rColl->reserve(totSize);
679 for (unsigned int i = 1; i < fColls.size(); ++i) {
680 auto &coll = fColls[i];
681 rColl->insert(rColl->end(), coll->begin(), coll->end());
682 }
683 }
684
685 std::string GetActionName() { return "Take"; }
686};
687
688// Extern templates for TakeHelper
689// NOTE: The move-constructor of specializations declared as extern templates
690// must be defined out of line, otherwise cling fails to find its symbol.
691template <typename RealT_t, typename T, typename COLL>
692TakeHelper<RealT_t, T, COLL>::TakeHelper(TakeHelper<RealT_t, T, COLL> &&) = default;
693template <typename RealT_t, typename T>
694TakeHelper<RealT_t, T, std::vector<T>>::TakeHelper(TakeHelper<RealT_t, T, std::vector<T>> &&) = default;
695template <typename RealT_t, typename COLL>
696TakeHelper<RealT_t, RVec<RealT_t>, COLL>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, COLL> &&) = default;
697template <typename RealT_t>
698TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>> &&) = default;
699
700// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
701#if __GNUC__ > 5
702extern template class TakeHelper<bool, bool, std::vector<bool>>;
703extern template class TakeHelper<unsigned int, unsigned int, std::vector<unsigned int>>;
704extern template class TakeHelper<unsigned long, unsigned long, std::vector<unsigned long>>;
705extern template class TakeHelper<unsigned long long, unsigned long long, std::vector<unsigned long long>>;
706extern template class TakeHelper<int, int, std::vector<int>>;
707extern template class TakeHelper<long, long, std::vector<long>>;
708extern template class TakeHelper<long long, long long, std::vector<long long>>;
709extern template class TakeHelper<float, float, std::vector<float>>;
710extern template class TakeHelper<double, double, std::vector<double>>;
711#endif
712
713
714template <typename ResultType>
715class MinHelper : public RActionImpl<MinHelper<ResultType>> {
716 const std::shared_ptr<ResultType> fResultMin;
717 Results<ResultType> fMins;
718
719public:
720 MinHelper(MinHelper &&) = default;
721 MinHelper(const std::shared_ptr<ResultType> &minVPtr, const unsigned int nSlots)
722 : fResultMin(minVPtr), fMins(nSlots, std::numeric_limits<ResultType>::max())
723 {
724 }
725
726 void Exec(unsigned int slot, ResultType v) { fMins[slot] = std::min(v, fMins[slot]); }
727
728 void InitTask(TTreeReader *, unsigned int) {}
729
730 template <typename T, typename std::enable_if<IsDataContainer<T>::value, int>::type = 0>
731 void Exec(unsigned int slot, const T &vs)
732 {
733 for (auto &&v : vs)
734 fMins[slot] = std::min(static_cast<ResultType>(v), fMins[slot]);
735 }
736
737 void Initialize() { /* noop */}
738
739 void Finalize()
740 {
741 *fResultMin = std::numeric_limits<ResultType>::max();
742 for (auto &m : fMins)
743 *fResultMin = std::min(m, *fResultMin);
744 }
745
746 ResultType &PartialUpdate(unsigned int slot) { return fMins[slot]; }
747
748 std::string GetActionName() { return "Min"; }
749};
750
751// TODO
752// extern template void MinHelper::Exec(unsigned int, const std::vector<float> &);
753// extern template void MinHelper::Exec(unsigned int, const std::vector<double> &);
754// extern template void MinHelper::Exec(unsigned int, const std::vector<char> &);
755// extern template void MinHelper::Exec(unsigned int, const std::vector<int> &);
756// extern template void MinHelper::Exec(unsigned int, const std::vector<unsigned int> &);
757
758template <typename ResultType>
759class MaxHelper : public RActionImpl<MaxHelper<ResultType>> {
760 const std::shared_ptr<ResultType> fResultMax;
761 Results<ResultType> fMaxs;
762
763public:
764 MaxHelper(MaxHelper &&) = default;
765 MaxHelper(const MaxHelper &) = delete;
766 MaxHelper(const std::shared_ptr<ResultType> &maxVPtr, const unsigned int nSlots)
767 : fResultMax(maxVPtr), fMaxs(nSlots, std::numeric_limits<ResultType>::lowest())
768 {
769 }
770
771 void InitTask(TTreeReader *, unsigned int) {}
772 void Exec(unsigned int slot, ResultType v) { fMaxs[slot] = std::max(v, fMaxs[slot]); }
773
774 template <typename T, typename std::enable_if<IsDataContainer<T>::value, int>::type = 0>
775 void Exec(unsigned int slot, const T &vs)
776 {
777 for (auto &&v : vs)
778 fMaxs[slot] = std::max(static_cast<ResultType>(v), fMaxs[slot]);
779 }
780
781 void Initialize() { /* noop */}
782
783 void Finalize()
784 {
785 *fResultMax = std::numeric_limits<ResultType>::lowest();
786 for (auto &m : fMaxs) {
787 *fResultMax = std::max(m, *fResultMax);
788 }
789 }
790
791 ResultType &PartialUpdate(unsigned int slot) { return fMaxs[slot]; }
792
793 std::string GetActionName() { return "Max"; }
794};
795
796// TODO
797// extern template void MaxHelper::Exec(unsigned int, const std::vector<float> &);
798// extern template void MaxHelper::Exec(unsigned int, const std::vector<double> &);
799// extern template void MaxHelper::Exec(unsigned int, const std::vector<char> &);
800// extern template void MaxHelper::Exec(unsigned int, const std::vector<int> &);
801// extern template void MaxHelper::Exec(unsigned int, const std::vector<unsigned int> &);
802
803template <typename ResultType>
804class SumHelper : public RActionImpl<SumHelper<ResultType>> {
805 const std::shared_ptr<ResultType> fResultSum;
806 Results<ResultType> fSums;
807
808 /// Evaluate neutral element for this type and the sum operation.
809 /// This is assumed to be any_value - any_value if operator- is defined
810 /// for the type, otherwise a default-constructed ResultType{} is used.
811 template <typename T = ResultType>
812 auto NeutralElement(const T &v, int /*overloadresolver*/) -> decltype(v - v)
813 {
814 return v - v;
815 }
816
817 template <typename T = ResultType, typename Dummy = int>
818 ResultType NeutralElement(const T &, Dummy) // this overload has lower priority thanks to the template arg
819 {
820 return ResultType{};
821 }
822
823public:
824 SumHelper(SumHelper &&) = default;
825 SumHelper(const SumHelper &) = delete;
826 SumHelper(const std::shared_ptr<ResultType> &sumVPtr, const unsigned int nSlots)
827 : fResultSum(sumVPtr), fSums(nSlots, NeutralElement(*sumVPtr, -1))
828 {
829 }
830
831 void InitTask(TTreeReader *, unsigned int) {}
832 void Exec(unsigned int slot, ResultType v) { fSums[slot] += v; }
833
834 template <typename T, typename std::enable_if<IsDataContainer<T>::value, int>::type = 0>
835 void Exec(unsigned int slot, const T &vs)
836 {
837 for (auto &&v : vs)
838 fSums[slot] += static_cast<ResultType>(v);
839 }
840
841 void Initialize() { /* noop */}
842
843 void Finalize()
844 {
845 for (auto &m : fSums)
846 *fResultSum += m;
847 }
848
849 ResultType &PartialUpdate(unsigned int slot) { return fSums[slot]; }
850
851 std::string GetActionName() { return "Sum"; }
852};
853
854class MeanHelper : public RActionImpl<MeanHelper> {
855 const std::shared_ptr<double> fResultMean;
856 std::vector<ULong64_t> fCounts;
857 std::vector<double> fSums;
858 std::vector<double> fPartialMeans;
859
860public:
861 MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
862 MeanHelper(MeanHelper &&) = default;
863 MeanHelper(const MeanHelper &) = delete;
864 void InitTask(TTreeReader *, unsigned int) {}
865 void Exec(unsigned int slot, double v);
866
867 template <typename T, typename std::enable_if<IsDataContainer<T>::value, int>::type = 0>
868 void Exec(unsigned int slot, const T &vs)
869 {
870 for (auto &&v : vs) {
871 fSums[slot] += v;
872 fCounts[slot]++;
873 }
874 }
875
876 void Initialize() { /* noop */}
877
878 void Finalize();
879
880 double &PartialUpdate(unsigned int slot);
881
882 std::string GetActionName() { return "Mean"; }
883};
884
885extern template void MeanHelper::Exec(unsigned int, const std::vector<float> &);
886extern template void MeanHelper::Exec(unsigned int, const std::vector<double> &);
887extern template void MeanHelper::Exec(unsigned int, const std::vector<char> &);
888extern template void MeanHelper::Exec(unsigned int, const std::vector<int> &);
889extern template void MeanHelper::Exec(unsigned int, const std::vector<unsigned int> &);
890
891class StdDevHelper : public RActionImpl<StdDevHelper> {
892 // Number of subsets of data
893 const unsigned int fNSlots;
894 const std::shared_ptr<double> fResultStdDev;
895 // Number of element for each slot
896 std::vector<ULong64_t> fCounts;
897 // Mean of each slot
898 std::vector<double> fMeans;
899 // Squared distance from the mean
900 std::vector<double> fDistancesfromMean;
901
902public:
903 StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
904 StdDevHelper(StdDevHelper &&) = default;
905 StdDevHelper(const StdDevHelper &) = delete;
906 void InitTask(TTreeReader *, unsigned int) {}
907 void Exec(unsigned int slot, double v);
908
909 template <typename T, typename std::enable_if<IsDataContainer<T>::value, int>::type = 0>
910 void Exec(unsigned int slot, const T &vs)
911 {
912 for (auto &&v : vs) {
913 Exec(slot, v);
914 }
915 }
916
917 void Initialize() { /* noop */}
918
919 void Finalize();
920
921 std::string GetActionName() { return "StdDev"; }
922};
923
924extern template void StdDevHelper::Exec(unsigned int, const std::vector<float> &);
925extern template void StdDevHelper::Exec(unsigned int, const std::vector<double> &);
926extern template void StdDevHelper::Exec(unsigned int, const std::vector<char> &);
927extern template void StdDevHelper::Exec(unsigned int, const std::vector<int> &);
928extern template void StdDevHelper::Exec(unsigned int, const std::vector<unsigned int> &);
929
930template <typename PrevNodeType>
931class DisplayHelper : public RActionImpl<DisplayHelper<PrevNodeType>> {
932private:
934 const std::shared_ptr<Display_t> fDisplayerHelper;
935 const std::shared_ptr<PrevNodeType> fPrevNode;
936
937public:
938 DisplayHelper(const std::shared_ptr<Display_t> &d, const std::shared_ptr<PrevNodeType> &prevNode)
939 : fDisplayerHelper(d), fPrevNode(prevNode)
940 {
941 }
942 DisplayHelper(DisplayHelper &&) = default;
943 DisplayHelper(const DisplayHelper &) = delete;
944 void InitTask(TTreeReader *, unsigned int) {}
945
946 template <typename... Columns>
947 void Exec(unsigned int, Columns... columns)
948 {
949 fDisplayerHelper->AddRow(columns...);
950 if (!fDisplayerHelper->HasNext()) {
951 fPrevNode->StopProcessing();
952 }
953 }
954
955 void Initialize() {}
956
957 void Finalize() {}
958
959 std::string GetActionName() { return "Display"; }
960};
961
962// std::vector<bool> is special, and not in a good way. As a consequence Snapshot of RVec<bool> needs to be treated
963// specially. In particular, if RVec<bool> is filled with a (fixed or variable size) boolean array coming from
964// a ROOT file, when writing out the correspinding branch from a Snapshot we do not have an address to set for the
965// TTree branch (std::vector<bool> and, consequently, RVec<bool> do not provide a `data()` method).
966// Bools is a lightweight wrapper around a C array of booleans that is meant to provide a stable address for the
967// output TTree to read the contents of the snapshotted branches at Fill time.
968class BoolArray {
969 std::size_t fSize = 0;
970 bool *fBools = nullptr;
971
972 bool *CopyVector(const RVec<bool> &v)
973 {
974 auto b = new bool[fSize];
975 std::copy(v.begin(), v.end(), b);
976 return b;
977 }
978
979 bool *CopyArray(bool *o, std::size_t size)
980 {
981 auto b = new bool[size];
982 for (auto i = 0u; i < size; ++i)
983 b[i] = o[i];
984 return b;
985 }
986
987public:
988 // this generic constructor could be replaced with a constexpr if in SetBranchesHelper
989 BoolArray() = default;
990 template <typename T>
991 BoolArray(const T &) { throw std::runtime_error("This constructor should never be called"); }
992 BoolArray(const RVec<bool> &v) : fSize(v.size()), fBools(CopyVector(v)) {}
993 BoolArray(const BoolArray &b)
994 {
995 CopyArray(b.fBools, b.fSize);
996 }
997 BoolArray &operator=(const BoolArray &b)
998 {
999 delete[] fBools;
1000 CopyArray(b.fBools, b.fSize);
1001 return *this;
1002 }
1003 BoolArray(BoolArray &&b)
1004 {
1005 fSize = b.fSize;
1006 fBools = b.fBools;
1007 b.fSize = 0;
1008 b.fBools = nullptr;
1009 }
1010 BoolArray &operator=(BoolArray &&b)
1011 {
1012 delete[] fBools;
1013 fSize = b.fSize;
1014 fBools = b.fBools;
1015 b.fSize = 0;
1016 b.fBools = nullptr;
1017 return *this;
1018 }
1019 ~BoolArray() { delete[] fBools; }
1020 std::size_t Size() const { return fSize; }
1021 bool *Data() { return fBools; }
1022};
1023using BoolArrayMap = std::map<std::string, BoolArray>;
1024
1025inline bool *UpdateBoolArrayIfBool(BoolArrayMap &boolArrays, RVec<bool> &v, const std::string &outName)
1026{
1027 // create a boolArrays entry
1028 boolArrays[outName] = BoolArray(v);
1029 return boolArrays[outName].Data();
1030}
1031
1032template <typename T>
1033T *UpdateBoolArrayIfBool(BoolArrayMap &, RVec<T> &v, const std::string &)
1034{
1035 return v.data();
1036}
1037
1038// Helper which gets the return value of the data() method if the type is an
1039// RVec (of anything but a bool), nullptr otherwise.
1040inline void *GetData(ROOT::VecOps::RVec<bool> & /*v*/)
1041{
1042 return nullptr;
1043}
1044
1045template <typename T>
1046void *GetData(ROOT::VecOps::RVec<T> &v)
1047{
1048 return v.data();
1049}
1050
1051template <typename T>
1052void *GetData(T & /*v*/)
1053{
1054 return nullptr;
1055}
1056
1057template <typename T>
1058void SetBranchesHelper(BoolArrayMap &, TTree *inputTree, TTree &outputTree, const std::string &inName,
1059 const std::string &name, TBranch *&branch, void *&branchAddress, T *address)
1060{
1061 auto *inputBranch = inputTree ? inputTree->GetBranch(inName.c_str()) : nullptr;
1062 if (inputBranch) {
1063 // Respect the original bufsize and splitlevel arguments
1064 // In particular, by keeping splitlevel equal to 0 if this was the case for `inputBranch`, we avoid
1065 // writing garbage when unsplit objects cannot be written as split objects (e.g. in case of a polymorphic
1066 // TObject branch, see https://bit.ly/2EjLMId ).
1067 const auto bufSize = inputBranch->GetBasketSize();
1068 const auto splitLevel = inputBranch->GetSplitLevel();
1069
1070 static TClassRef tbo_cl("TBranchObject");
1071 if (inputBranch->IsA() == tbo_cl) {
1072 // Need to pass a pointer to pointer
1073 outputTree.Branch(name.c_str(), (T **)inputBranch->GetAddress(), bufSize, splitLevel);
1074 } else {
1075 outputTree.Branch(name.c_str(), address, bufSize, splitLevel);
1076 }
1077 } else {
1078 outputTree.Branch(name.c_str(), address);
1079 }
1080 // This is not an array branch, so we don't need to register the address of the input branch.
1081 branch = nullptr;
1082 branchAddress = nullptr;
1083}
1084
1085/// Helper function for SnapshotHelper and SnapshotHelperMT. It creates new branches for the output TTree of a Snapshot.
1086/// This overload is called for columns of type `RVec<T>`. For RDF, these can represent:
1087/// 1. c-style arrays in ROOT files, so we are sure that there are input trees to which we can ask the correct branch title
1088/// 2. RVecs coming from a custom column or a source
1089/// 3. vectors coming from ROOT files
1090/// 4. TClonesArray
1091///
1092/// In case of 1., we keep aside the pointer to the branch and the pointer to the input value (in `branch` and
1093/// `branchAddress`) so we can intercept changes in the address of the input branch and tell the output branch.
1094template <typename T>
1095void SetBranchesHelper(BoolArrayMap &boolArrays, TTree *inputTree, TTree &outputTree, const std::string &inName,
1096 const std::string &outName, TBranch *&branch, void *&branchAddress, RVec<T> *ab)
1097{
1098 auto *const inputBranch = inputTree ? inputTree->GetBranch(inName.c_str()) : nullptr;
1099 const bool isTClonesArray = inputBranch != nullptr && std::string(inputBranch->GetClassName()) == "TClonesArray";
1100 const auto mustWriteStdVec = !inputBranch || isTClonesArray ||
1101 ROOT::ESTLType::kSTLvector == TClassEdit::IsSTLCont(inputBranch->GetClassName());
1102
1103 if (mustWriteStdVec) {
1104 // Treat:
1105 // 2. RVec coming from a custom column or a source
1106 // 3. RVec coming from a column on disk of type vector (the RVec is adopting the data of that vector)
1107 // 4. TClonesArray.
1108 // In all cases, we write out a std::vector<T> when the column is RVec<T>
1109 if (isTClonesArray) {
1110 Warning("Snapshot",
1111 "Branch \"%s\" contains TClonesArrays but the type specified to Snapshot was RVec<T>. The branch will "
1112 "be written out as a std::vector instead of a TClonesArray. Specify that the type of the branch is "
1113 "TClonesArray as a Snapshot template parameter to write out a TClonesArray instead.", inName.c_str());
1114 }
1115 outputTree.Branch(outName.c_str(), &ab->AsVector());
1116 return;
1117 }
1118
1119 // Treat 1, the C-array case
1120 auto *const leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
1121 const auto bname = leaf->GetName();
1122 const auto counterStr =
1123 leaf->GetLeafCount() ? std::string(leaf->GetLeafCount()->GetName()) : std::to_string(leaf->GetLenStatic());
1124 const auto btype = leaf->GetTypeName();
1125 const auto rootbtype = TypeName2ROOTTypeName(btype);
1126 const auto leaflist = std::string(bname) + "[" + counterStr + "]/" + rootbtype;
1127
1128 /// RVec<bool> is special because std::vector<bool> is special. In particular, it has no `data()`,
1129 /// so we need to explicitly manage storage of the data that the tree needs to Fill branches with.
1130 auto dataPtr = UpdateBoolArrayIfBool(boolArrays, *ab, outName);
1131
1132 auto *const outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str());
1133 outputBranch->SetTitle(inputBranch->GetTitle());
1134
1135 // Record the branch ptr and the address associated to it if this is not a bool array
1136 if (!std::is_same<bool, T>::value) {
1137 branch = outputBranch;
1138 branchAddress = GetData(*ab);
1139 }
1140}
1141
1142// generic version, no-op
1143template <typename T>
1144void UpdateBoolArray(BoolArrayMap &, T&, const std::string &, TTree &) {}
1145
1146// RVec<bool> overload, update boolArrays if needed
1147inline void UpdateBoolArray(BoolArrayMap &boolArrays, RVec<bool> &v, const std::string &outName, TTree &t)
1148{
1149 // in case the RVec<bool> does not correspond to a bool C-array
1150 if (boolArrays.find(outName) == boolArrays.end())
1151 return;
1152
1153 if (v.size() > boolArrays[outName].Size()) {
1154 boolArrays[outName] = BoolArray(v); // resize and copy
1155 t.SetBranchAddress(outName.c_str(), boolArrays[outName].Data());
1156 }
1157 else {
1158 std::copy(v.begin(), v.end(), boolArrays[outName].Data()); // just copy
1159 }
1160}
1161
1162void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName);
1163
1164/// Helper object for a single-thread Snapshot action
1165template <typename... BranchTypes>
1166class SnapshotHelper : public RActionImpl<SnapshotHelper<BranchTypes...>> {
1167 const std::string fFileName;
1168 const std::string fDirName;
1169 const std::string fTreeName;
1170 const RSnapshotOptions fOptions;
1171 std::unique_ptr<TFile> fOutputFile;
1172 std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
1173 bool fIsFirstEvent{true};
1174 const ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1175 const ColumnNames_t fOutputBranchNames;
1176 TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
1177 BoolArrayMap fBoolArrays; // Storage for C arrays of bools to be written out
1178 std::vector<TBranch *> fBranches; // Addresses of branches in output, non-null only for the ones holding C arrays
1179 std::vector<void *> fBranchAddresses; // Addresses associated to output branches, non-null only for the ones holding C arrays
1180
1181public:
1182 using ColumnTypes_t = TypeList<BranchTypes...>;
1183 SnapshotHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
1184 const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options)
1185 : fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1186 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fBranches(vbnames.size(), nullptr),
1187 fBranchAddresses(vbnames.size(), nullptr)
1188 {
1189 ValidateSnapshotOutput(fOptions, fTreeName, fFileName);
1190 }
1191
1192 SnapshotHelper(const SnapshotHelper &) = delete;
1193 SnapshotHelper(SnapshotHelper &&) = default;
1194
1195 void InitTask(TTreeReader *r, unsigned int /* slot */)
1196 {
1197 if (!r) // empty source, nothing to do
1198 return;
1199 fInputTree = r->GetTree();
1200 // AddClone guarantees that if the input file changes the branches of the output tree are updated with the new
1201 // addresses of the branch values
1202 fInputTree->AddClone(fOutputTree.get());
1203 }
1204
1205 void Exec(unsigned int /* slot */, BranchTypes &... values)
1206 {
1207 using ind_t = std::index_sequence_for<BranchTypes...>;
1208 if (! fIsFirstEvent) {
1209 UpdateCArraysPtrs(values..., ind_t{});
1210 } else {
1211 SetBranches(values..., ind_t{});
1212 fIsFirstEvent = false;
1213 }
1214 UpdateBoolArrays(values..., ind_t{});
1215 fOutputTree->Fill();
1216 }
1217
1218 template <std::size_t... S>
1219 void UpdateCArraysPtrs(BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1220 {
1221 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1222 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1223 // leaving associated to the branch of the output tree an invalid pointer.
1224 // With this code, we set the value of the pointer in the output branch anew when needed.
1225 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1226 // we need an int for the expander list.
1227 int expander[] = {(fBranches[S] && fBranchAddresses[S] != GetData(values)
1228 ? fBranches[S]->SetAddress(GetData(values)),
1229 fBranchAddresses[S] = GetData(values), 0 : 0, 0)...,
1230 0};
1231 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1232 }
1233
1234 template <std::size_t... S>
1235 void SetBranches(BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1236 {
1237 // create branches in output tree (and fill fBoolArrays for RVec<bool> columns)
1238 int expander[] = {(SetBranchesHelper(fBoolArrays, fInputTree, *fOutputTree, fInputBranchNames[S],
1239 fOutputBranchNames[S], fBranches[S], fBranchAddresses[S], &values),
1240 0)...,
1241 0};
1242 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1243 }
1244
1245 template <std::size_t... S>
1246 void UpdateBoolArrays(BranchTypes &...values, std::index_sequence<S...> /*dummy*/)
1247 {
1248 int expander[] = {(UpdateBoolArray(fBoolArrays, values, fOutputBranchNames[S], *fOutputTree), 0)..., 0};
1249 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1250 }
1251
1252 void Initialize()
1253 {
1254 fOutputFile.reset(
1255 TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
1257
1258 TDirectory *outputDir = fOutputFile.get();
1259 if (!fDirName.empty()) {
1260 TString checkupdate = fOptions.fMode;
1261 checkupdate.ToLower();
1262 if (checkupdate == "update")
1263 outputDir = fOutputFile->mkdir(fDirName.c_str(), "", true); // do not overwrite existing directory
1264 else
1265 outputDir = fOutputFile->mkdir(fDirName.c_str());
1266 }
1267
1268 fOutputTree =
1269 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/outputDir);
1270
1271 if (fOptions.fAutoFlush)
1272 fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
1273 }
1274
1275 void Finalize()
1276 {
1277 if (fOutputFile && fOutputTree) {
1278 ::TDirectory::TContext ctxt(fOutputFile->GetDirectory(fDirName.c_str()));
1279 fOutputTree->Write();
1280 // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
1281 fOutputTree.reset();
1282 fOutputFile->Close();
1283 } else {
1284 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1285 }
1286 }
1287
1288 std::string GetActionName() { return "Snapshot"; }
1289};
1290
1291/// Helper object for a multi-thread Snapshot action
1292template <typename... BranchTypes>
1293class SnapshotHelperMT : public RActionImpl<SnapshotHelperMT<BranchTypes...>> {
1294 const unsigned int fNSlots;
1295 std::unique_ptr<ROOT::Experimental::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
1296 std::vector<std::shared_ptr<ROOT::Experimental::TBufferMergerFile>> fOutputFiles;
1297 std::vector<std::unique_ptr<TTree>> fOutputTrees;
1298 std::vector<int> fIsFirstEvent; // vector<bool> does not allow concurrent writing of different elements
1299 const std::string fFileName; // name of the output file name
1300 const std::string fDirName; // name of TFile subdirectory in which output must be written (possibly empty)
1301 const std::string fTreeName; // name of output tree
1302 const RSnapshotOptions fOptions; // struct holding options to pass down to TFile and TTree in this action
1303 const ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1304 const ColumnNames_t fOutputBranchNames;
1305 std::vector<TTree *> fInputTrees; // Current input trees. Set at initialization time (`InitTask`)
1306 std::vector<BoolArrayMap> fBoolArrays; // Per-thread storage for C arrays of bools to be written out
1307 // Addresses of branches in output per slot, non-null only for the ones holding C arrays
1308 std::vector<std::vector<TBranch *>> fBranches;
1309 // Addresses associated to output branches per slot, non-null only for the ones holding C arrays
1310 std::vector<std::vector<void *>> fBranchAddresses;
1311
1312public:
1313 using ColumnTypes_t = TypeList<BranchTypes...>;
1314 SnapshotHelperMT(const unsigned int nSlots, std::string_view filename, std::string_view dirname,
1315 std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
1316 const RSnapshotOptions &options)
1317 : fNSlots(nSlots), fOutputFiles(fNSlots), fOutputTrees(fNSlots), fIsFirstEvent(fNSlots, 1), fFileName(filename),
1318 fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1319 fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fInputTrees(fNSlots), fBoolArrays(fNSlots),
1320 fBranches(fNSlots, std::vector<TBranch *>(vbnames.size(), nullptr)),
1321 fBranchAddresses(fNSlots, std::vector<void *>(vbnames.size(), nullptr))
1322 {
1323 ValidateSnapshotOutput(fOptions, fTreeName, fFileName);
1324 }
1325 SnapshotHelperMT(const SnapshotHelperMT &) = delete;
1326 SnapshotHelperMT(SnapshotHelperMT &&) = default;
1327
1328 void InitTask(TTreeReader *r, unsigned int slot)
1329 {
1330 ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
1331 if (!fOutputFiles[slot]) {
1332 // first time this thread executes something, let's create a TBufferMerger output directory
1333 fOutputFiles[slot] = fMerger->GetFile();
1334 }
1335 TDirectory *treeDirectory = fOutputFiles[slot].get();
1336 if (!fDirName.empty()) {
1337 // call returnExistingDirectory=true since MT can end up making this call multiple times
1338 treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
1339 }
1340 // re-create output tree as we need to create its branches again, with new input variables
1341 // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
1342 fOutputTrees[slot] =
1343 std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
1344 fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
1345 // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
1346 fOutputTrees[slot]->SetImplicitMT(false);
1347 if (fOptions.fAutoFlush)
1348 fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
1349 if (r) {
1350 // not an empty-source RDF
1351 fInputTrees[slot] = r->GetTree();
1352 // AddClone guarantees that if the input file changes the branches of the output tree are updated with the new
1353 // addresses of the branch values. We need this in case of friend trees with different cluster granularity
1354 // than the main tree.
1355 // FIXME: AddClone might result in many many (safe) warnings printed by TTree::CopyAddresses, see ROOT-9487.
1356 const auto friendsListPtr = fInputTrees[slot]->GetListOfFriends();
1357 if (friendsListPtr && friendsListPtr->GetEntries() > 0)
1358 fInputTrees[slot]->AddClone(fOutputTrees[slot].get());
1359 }
1360 fIsFirstEvent[slot] = 1; // reset first event flag for this slot
1361 }
1362
1363 void FinalizeTask(unsigned int slot)
1364 {
1365 if (fOutputTrees[slot]->GetEntries() > 0)
1366 fOutputFiles[slot]->Write();
1367 // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
1368 fOutputTrees[slot].reset(nullptr);
1369 }
1370
1371 void Exec(unsigned int slot, BranchTypes &... values)
1372 {
1373 using ind_t = std::index_sequence_for<BranchTypes...>;
1374 if (!fIsFirstEvent[slot]) {
1375 UpdateCArraysPtrs(slot, values..., ind_t{});
1376 } else {
1377 SetBranches(slot, values..., ind_t{});
1378 fIsFirstEvent[slot] = 0;
1379 }
1380 UpdateBoolArrays(slot, values..., ind_t{});
1381 fOutputTrees[slot]->Fill();
1382 auto entries = fOutputTrees[slot]->GetEntries();
1383 auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
1384 if ((autoFlush > 0) && (entries % autoFlush == 0))
1385 fOutputFiles[slot]->Write();
1386 }
1387
1388 template <std::size_t... S>
1389 void UpdateCArraysPtrs(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1390 {
1391 // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1392 // associated to those is re-allocated. As a result the value of the pointer can change therewith
1393 // leaving associated to the branch of the output tree an invalid pointer.
1394 // With this code, we set the value of the pointer in the output branch anew when needed.
1395 // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1396 // we need an int for the expander list.
1397 (void)slot; // avoid bogus 'unused parameter' warning
1398 int expander[] = {(fBranches[slot][S] && fBranchAddresses[slot][S] != GetData(values)
1399 ? fBranches[slot][S]->SetAddress(GetData(values)),
1400 fBranchAddresses[slot][S] = GetData(values), 0 : 0, 0)...,
1401 0};
1402 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1403 }
1404
1405 template <std::size_t... S>
1406 void SetBranches(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1407 {
1408 // hack to call TTree::Branch on all variadic template arguments
1409 int expander[] = {
1410 (SetBranchesHelper(fBoolArrays[slot], fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S],
1411 fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S], &values),
1412 0)...,
1413 0};
1414 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1415 (void)slot; // avoid unused variable warnings in gcc6.2
1416 }
1417
1418 template <std::size_t... S>
1419 void UpdateBoolArrays(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1420 {
1421 (void)slot; // avoid bogus 'unused parameter' warning
1422 int expander[] = {
1423 (UpdateBoolArray(fBoolArrays[slot], values, fOutputBranchNames[S], *fOutputTrees[slot]), 0)..., 0};
1424 (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1425 }
1426
1427 void Initialize()
1428 {
1429 const auto cs = ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
1430 fMerger = std::make_unique<ROOT::Experimental::TBufferMerger>(fFileName.c_str(), fOptions.fMode.c_str(), cs);
1431 }
1432
1433 void Finalize()
1434 {
1435 auto fileWritten = false;
1436 for (auto &file : fOutputFiles) {
1437 if (file) {
1438 file->Write();
1439 file->Close();
1440 fileWritten = true;
1441 }
1442 }
1443
1444 if (!fileWritten) {
1445 Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1446 }
1447
1448 // flush all buffers to disk by destroying the TBufferMerger
1449 fOutputFiles.clear();
1450 fMerger.reset();
1451 }
1452
1453 std::string GetActionName() { return "Snapshot"; }
1454};
1455
1456template <typename Acc, typename Merge, typename R, typename T, typename U,
1457 bool MustCopyAssign = std::is_same<R, U>::value>
1458class AggregateHelper : public RActionImpl<AggregateHelper<Acc, Merge, R, T, U, MustCopyAssign>> {
1459 Acc fAggregate;
1460 Merge fMerge;
1461 const std::shared_ptr<U> fResult;
1462 Results<U> fAggregators;
1463
1464public:
1465 using ColumnTypes_t = TypeList<T>;
1466 AggregateHelper(Acc &&f, Merge &&m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1467 : fAggregate(std::move(f)), fMerge(std::move(m)), fResult(result), fAggregators(nSlots, *result)
1468 {
1469 }
1470 AggregateHelper(AggregateHelper &&) = default;
1471 AggregateHelper(const AggregateHelper &) = delete;
1472
1473 void InitTask(TTreeReader *, unsigned int) {}
1474
1476 void Exec(unsigned int slot, const T &value)
1477 {
1478 fAggregators[slot] = fAggregate(fAggregators[slot], value);
1479 }
1480
1482 void Exec(unsigned int slot, const T &value)
1483 {
1484 fAggregate(fAggregators[slot], value);
1485 }
1486
1487 void Initialize() { /* noop */}
1488
1489 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1490 bool MergeAll = std::is_same<void, MergeRet>::value>
1491 typename std::enable_if<MergeAll, void>::type Finalize()
1492 {
1493 fMerge(fAggregators);
1494 *fResult = fAggregators[0];
1495 }
1496
1497 template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1498 bool MergeTwoByTwo = std::is_same<U, MergeRet>::value>
1499 typename std::enable_if<MergeTwoByTwo, void>::type Finalize(...) // ... needed to let compiler distinguish overloads
1500 {
1501 for (const auto &acc : fAggregators)
1502 *fResult = fMerge(*fResult, acc);
1503 }
1504
1505 U &PartialUpdate(unsigned int slot) { return fAggregators[slot]; }
1506
1507 std::string GetActionName() { return "Aggregate"; }
1508};
1509
1510} // end of NS RDF
1511} // end of NS Internal
1512} // end of NS ROOT
1513
1514/// \endcond
1515
1516#endif
PyObject * fCallable
Definition: CPPOverload.cxx:41
double
Definition: Converters.cxx:921
size_t fSize
Handle_t Display_t
Definition: GuiTypes.h:26
ROOT::R::TRInterface & r
Definition: Object.C:4
#define d(i)
Definition: RSha256.hxx:102
#define b(i)
Definition: RSha256.hxx:100
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
#define g(i)
Definition: RSha256.hxx:105
#define h(i)
Definition: RSha256.hxx:106
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
static const double x2[5]
static const double x1[5]
static const double x3[11]
unsigned long long ULong64_t
Definition: RtypesCore.h:72
void Warning(const char *location, const char *msgfmt,...)
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
Binding & operator=(OUT(*fun)(void))
typedef void((*Func_t)())
This class is the textual representation of the content of a columnar dataset.
Definition: RDisplay.hxx:64
A "std::vector"-like collection of values implementing handy operation to analyse them.
Definition: RVec.hxx:275
iterator end() noexcept
Definition: RVec.hxx:390
const Impl_t & AsVector() const
Definition: RVec.hxx:351
iterator begin() noexcept
Definition: RVec.hxx:387
A TTree is a list of TBranches.
Definition: TBranch.h:91
virtual Int_t GetBasketSize() const
Definition: TBranch.h:215
TClassRef is used to implement a permanent reference to a TClass object.
Definition: TClassRef.h:28
Small helper to keep current directory context.
Definition: TDirectory.h:47
Describe directory structure in memory.
Definition: TDirectory.h:40
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3942
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:614
The TH1 histogram class.
Definition: TH1.h:56
A TLeaf describes individual elements of a TBranch See TBranch structure in TTree.
Definition: TLeaf.h:49
A doubly linked list.
Definition: TList.h:44
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:796
Basic string class.
Definition: TString.h:131
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1125
A simple, robust and fast interface to read values from ROOT columnar datasets such as TTree,...
Definition: TTreeReader.h:43
A TTree represents a columnar dataset.
Definition: TTree.h:78
virtual TBranch * GetBranch(const char *name)
Return pointer to the branch with the given name in this tree or its friends.
Definition: TTree.cxx:5209
virtual Int_t SetBranchAddress(const char *bname, void *add, TBranch **ptr=0)
Change branch address, dealing with clone trees properly.
Definition: TTree.cxx:8237
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
Definition: TTree.h:348
@ kEntriesReshuffled
If set, signals that this TTree is the output of the processing of another TTree, and the entries are...
Definition: TTree.h:256
void AddClone(TTree *)
Add a cloned tree to our list of trees to be notified whenever we change our branch addresses or when...
Definition: TTree.cxx:1204
RooCmdArg Columns(Int_t ncol)
Double_t x[n]
Definition: legend1.C:17
basic_string_view< char > string_view
#define F(x, y, z)
CPYCPPYY_EXTERN bool Exec(const std::string &cmd)
Definition: API.cxx:331
std::vector< std::string > ReplaceDotWithUnderscore(const std::vector< std::string > &columnNames)
Replace occurrences of '.
Definition: RDFUtils.cxx:283
void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
char TypeName2ROOTTypeName(const std::string &b)
Convert type name (e.g.
Definition: RDFUtils.cxx:243
double T(double x)
Definition: ChebyshevPol.h:34
ROOT type_traits extensions.
Definition: TypeTraits.hxx:21
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: StringConv.hxx:21
@ kSTLvector
Definition: ESTLType.h:30
int CompressionSettings(RCompressionSetting::EAlgorithm algorithm, int compressionLevel)
RooArgSet S(const RooAbsArg &v1)
ROOT::ESTLType IsSTLCont(std::string_view type)
type : type name: vector<list<classA,allocator>,allocator> result: 0 : not stl container code of cont...
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
Definition: file.py:1
const char * Size
Definition: TXMLSetup.cxx:55
A collection of options to steer the creation of the dataset on file.
int fAutoFlush
AutoFlush value for output tree.
std::string fMode
Mode of creation of output file.
ECAlgo fCompressionAlgorithm
Compression algorithm of output file.
int fSplitLevel
Split level of output tree.
int fCompressionLevel
Compression level of output file.
Lightweight storage for a collection of types.
Definition: TypeTraits.hxx:25
auto * m
Definition: textangle.C:8
auto * l
Definition: textangle.C:4
void ws()
Definition: ws.C:66