Logo ROOT  
Reference Guide
RDFActionHelpers.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 12/2016
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
12
13namespace ROOT {
14namespace Internal {
15namespace RDF {
16
17CountHelper::CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots)
18 : fResultCount(resultCount), fCounts(nSlots, 0)
19{
20}
21
22void CountHelper::Exec(unsigned int slot)
23{
24 fCounts[slot]++;
25}
26
27void CountHelper::Finalize()
28{
29 *fResultCount = 0;
30 for (auto &c : fCounts) {
31 *fResultCount += c;
32 }
33}
34
35ULong64_t &CountHelper::PartialUpdate(unsigned int slot)
36{
37 return fCounts[slot];
38}
39
40void BufferedFillHelper::UpdateMinMax(unsigned int slot, double v)
41{
42 auto &thisMin = fMin[slot];
43 auto &thisMax = fMax[slot];
44 thisMin = std::min(thisMin, v);
45 thisMax = std::max(thisMax, v);
46}
47
48BufferedFillHelper::BufferedFillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots)
49 : fResultHist(h), fNSlots(nSlots), fBufSize(fgTotalBufSize / nSlots), fPartialHists(fNSlots),
50 fMin(nSlots, std::numeric_limits<BufEl_t>::max()), fMax(nSlots, std::numeric_limits<BufEl_t>::lowest())
51{
52 fBuffers.reserve(fNSlots);
53 fWBuffers.reserve(fNSlots);
54 for (unsigned int i = 0; i < fNSlots; ++i) {
55 Buf_t v;
56 v.reserve(fBufSize);
57 fBuffers.emplace_back(v);
58 fWBuffers.emplace_back(v);
59 }
60}
61
62void BufferedFillHelper::Exec(unsigned int slot, double v)
63{
64 UpdateMinMax(slot, v);
65 fBuffers[slot].emplace_back(v);
66}
67
68void BufferedFillHelper::Exec(unsigned int slot, double v, double w)
69{
70 UpdateMinMax(slot, v);
71 fBuffers[slot].emplace_back(v);
72 fWBuffers[slot].emplace_back(w);
73}
74
75Hist_t &BufferedFillHelper::PartialUpdate(unsigned int slot)
76{
77 auto &partialHist = fPartialHists[slot];
78 // TODO it is inefficient to re-create the partial histogram everytime the callback is called
79 // ideally we could incrementally fill it with the latest entries in the buffers
80 partialHist = std::make_unique<Hist_t>(*fResultHist);
81 auto weights = fWBuffers[slot].empty() ? nullptr : fWBuffers[slot].data();
82 partialHist->FillN(fBuffers[slot].size(), fBuffers[slot].data(), weights);
83 return *partialHist;
84}
85
86void BufferedFillHelper::Finalize()
87{
88 for (unsigned int i = 0; i < fNSlots; ++i) {
89 if (!fWBuffers[i].empty() && fBuffers[i].size() != fWBuffers[i].size()) {
90 throw std::runtime_error("Cannot fill weighted histogram with values in containers of different sizes.");
91 }
92 }
93
94 BufEl_t globalMin = *std::min_element(fMin.begin(), fMin.end());
95 BufEl_t globalMax = *std::max_element(fMax.begin(), fMax.end());
96
97 if (fResultHist->CanExtendAllAxes() && globalMin != std::numeric_limits<BufEl_t>::max() &&
98 globalMax != std::numeric_limits<BufEl_t>::lowest()) {
99 fResultHist->SetBins(fResultHist->GetNbinsX(), globalMin, globalMax);
100 }
101
102 for (unsigned int i = 0; i < fNSlots; ++i) {
103 auto weights = fWBuffers[i].empty() ? nullptr : fWBuffers[i].data();
104 fResultHist->FillN(fBuffers[i].size(), fBuffers[i].data(), weights);
105 }
106}
107
108template void BufferedFillHelper::Exec(unsigned int, const std::vector<float> &);
109template void BufferedFillHelper::Exec(unsigned int, const std::vector<double> &);
110template void BufferedFillHelper::Exec(unsigned int, const std::vector<char> &);
111template void BufferedFillHelper::Exec(unsigned int, const std::vector<int> &);
112template void BufferedFillHelper::Exec(unsigned int, const std::vector<unsigned int> &);
113template void BufferedFillHelper::Exec(unsigned int, const std::vector<float> &, const std::vector<float> &);
114template void BufferedFillHelper::Exec(unsigned int, const std::vector<double> &, const std::vector<double> &);
115template void BufferedFillHelper::Exec(unsigned int, const std::vector<char> &, const std::vector<char> &);
116template void BufferedFillHelper::Exec(unsigned int, const std::vector<int> &, const std::vector<int> &);
117template void
118BufferedFillHelper::Exec(unsigned int, const std::vector<unsigned int> &, const std::vector<unsigned int> &);
119
120// TODO
121// template void MinHelper::Exec(unsigned int, const std::vector<float> &);
122// template void MinHelper::Exec(unsigned int, const std::vector<double> &);
123// template void MinHelper::Exec(unsigned int, const std::vector<char> &);
124// template void MinHelper::Exec(unsigned int, const std::vector<int> &);
125// template void MinHelper::Exec(unsigned int, const std::vector<unsigned int> &);
126
127// template void MaxHelper::Exec(unsigned int, const std::vector<float> &);
128// template void MaxHelper::Exec(unsigned int, const std::vector<double> &);
129// template void MaxHelper::Exec(unsigned int, const std::vector<char> &);
130// template void MaxHelper::Exec(unsigned int, const std::vector<int> &);
131// template void MaxHelper::Exec(unsigned int, const std::vector<unsigned int> &);
132
133MeanHelper::MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots)
134 : fResultMean(meanVPtr), fCounts(nSlots, 0), fSums(nSlots, 0), fPartialMeans(nSlots)
135{
136}
137
138void MeanHelper::Exec(unsigned int slot, double v)
139{
140 fSums[slot] += v;
141 fCounts[slot]++;
142}
143
144void MeanHelper::Finalize()
145{
146 double sumOfSums = 0;
147 for (auto &s : fSums)
148 sumOfSums += s;
149 ULong64_t sumOfCounts = 0;
150 for (auto &c : fCounts)
151 sumOfCounts += c;
152 *fResultMean = sumOfSums / (sumOfCounts > 0 ? sumOfCounts : 1);
153}
154
155double &MeanHelper::PartialUpdate(unsigned int slot)
156{
157 fPartialMeans[slot] = fSums[slot] / fCounts[slot];
158 return fPartialMeans[slot];
159}
160
161template void MeanHelper::Exec(unsigned int, const std::vector<float> &);
162template void MeanHelper::Exec(unsigned int, const std::vector<double> &);
163template void MeanHelper::Exec(unsigned int, const std::vector<char> &);
164template void MeanHelper::Exec(unsigned int, const std::vector<int> &);
165template void MeanHelper::Exec(unsigned int, const std::vector<unsigned int> &);
166
167StdDevHelper::StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots)
168 : fNSlots(nSlots), fResultStdDev(meanVPtr), fCounts(nSlots, 0), fMeans(nSlots, 0), fDistancesfromMean(nSlots, 0)
169{
170}
171
172void StdDevHelper::Exec(unsigned int slot, double v)
173{
174 // Applies the Welford's algorithm to the stream of values received by the thread
175 auto count = ++fCounts[slot];
176 auto delta = v - fMeans[slot];
177 auto mean = fMeans[slot] + delta / count;
178 auto delta2 = v - mean;
179 auto distance = fDistancesfromMean[slot] + delta * delta2;
180
181 fCounts[slot] = count;
182 fMeans[slot] = mean;
183 fDistancesfromMean[slot] = distance;
184}
185
186void StdDevHelper::Finalize()
187{
188 // Evaluates and merges the partial result of each set of data to get the overall standard deviation.
189 double totalElements = 0;
190 for (auto c : fCounts) {
191 totalElements += c;
192 }
193 if (totalElements == 0 || totalElements == 1) {
194 // Std deviation is not defined for 1 element.
195 *fResultStdDev = 0;
196 return;
197 }
198
199 double overallMean = 0;
200 for (unsigned int i = 0; i < fNSlots; ++i) {
201 overallMean += fCounts[i] * fMeans[i];
202 }
203 overallMean = overallMean / totalElements;
204
205 double variance = 0;
206 for (unsigned int i = 0; i < fNSlots; ++i) {
207 if (fCounts[i] == 0) {
208 continue;
209 }
210 auto setVariance = fDistancesfromMean[i] / (fCounts[i]);
211 variance += (fCounts[i]) * (setVariance + std::pow((fMeans[i] - overallMean), 2));
212 }
213
214 variance = variance / (totalElements - 1);
215 *fResultStdDev = std::sqrt(variance);
216}
217
218template void StdDevHelper::Exec(unsigned int, const std::vector<float> &);
219template void StdDevHelper::Exec(unsigned int, const std::vector<double> &);
220template void StdDevHelper::Exec(unsigned int, const std::vector<char> &);
221template void StdDevHelper::Exec(unsigned int, const std::vector<int> &);
222template void StdDevHelper::Exec(unsigned int, const std::vector<unsigned int> &);
223
224// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
225#if __GNUC__ > 5
226template class TakeHelper<bool, bool, std::vector<bool>>;
227template class TakeHelper<unsigned int, unsigned int, std::vector<unsigned int>>;
228template class TakeHelper<unsigned long, unsigned long, std::vector<unsigned long>>;
229template class TakeHelper<unsigned long long, unsigned long long, std::vector<unsigned long long>>;
230template class TakeHelper<int, int, std::vector<int>>;
231template class TakeHelper<long, long, std::vector<long>>;
232template class TakeHelper<long long, long long, std::vector<long long>>;
233template class TakeHelper<float, float, std::vector<float>>;
234template class TakeHelper<double, double, std::vector<double>>;
235#endif
236
237void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
238{
239 TString fileMode = opts.fMode;
240 fileMode.ToLower();
241 if (fileMode != "update")
242 return;
243
244 // output file opened in "update" mode: must check whether output TTree is already present in file
245 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
246 if (!outFile || outFile->IsZombie())
247 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
248
249 TObject *outTree = outFile->Get(treeName.c_str());
250 if (outTree == nullptr)
251 return;
252
253 // object called treeName is already present in the file
254 if (opts.fOverwriteIfExists) {
255 if (outTree->InheritsFrom("TTree")) {
256 static_cast<TTree *>(outTree)->Delete("all");
257 } else {
258 outFile->Delete(treeName.c_str());
259 }
260 } else {
261 const std::string msg = "Snapshot: tree \"" + treeName + "\" already present in file \"" + fileName +
262 "\". If you want to delete the original tree and write another, please set "
263 "RSnapshotOptions::fOverwriteIfExists to true.";
264 throw std::invalid_argument(msg);
265 }
266}
267
268} // end NS RDF
269} // end NS Internal
270} // end NS ROOT
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
unsigned long long ULong64_t
Definition: RtypesCore.h:81
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:4019
Mother of all ROOT objects.
Definition: TObject.h:37
virtual Bool_t InheritsFrom(const char *classname) const
Returns kTRUE if object inherits from class "classname".
Definition: TObject.cxx:445
Basic string class.
Definition: TString.h:136
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1150
A TTree represents a columnar dataset.
Definition: TTree.h:79
void Delete(Option_t *option="") override
Delete this tree from memory or/and disk.
Definition: TTree.cxx:3718
RVec< PromoteTypes< T0, T1 > > pow(const T0 &x, const RVec< T1 > &v)
Definition: RVec.hxx:1753
CPYCPPYY_EXTERN bool Exec(const std::string &cmd)
Definition: API.cxx:333
void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
VecExpr< UnaryOp< Sqrt< T >, VecExpr< A, T, D >, T >, T, D > sqrt(const VecExpr< A, T, D > &rhs)
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
static constexpr double s
A collection of options to steer the creation of the dataset on file.
std::string fMode
Mode of creation of output file.
bool fOverwriteIfExists
If fMode is "UPDATE", overwrite object in output file if it already exists.