Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RDFActionHelpers.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 12/2016
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
12#include "ROOT/RDF/Utils.hxx" // CacheLineStep
13
14namespace ROOT {
15namespace Internal {
16namespace RDF {
17
18CountHelper::CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots)
19 : fResultCount(resultCount), fCounts(nSlots, 0)
20{
21}
22
23void CountHelper::Exec(unsigned int slot)
24{
25 fCounts[slot]++;
26}
27
28void CountHelper::Finalize()
29{
30 *fResultCount = 0;
31 for (auto &c : fCounts) {
32 *fResultCount += c;
33 }
34}
35
36ULong64_t &CountHelper::PartialUpdate(unsigned int slot)
37{
38 return fCounts[slot];
39}
40
41void FillHelper::UpdateMinMax(unsigned int slot, double v)
42{
43 auto &thisMin = fMin[slot * CacheLineStep<BufEl_t>()];
44 auto &thisMax = fMax[slot * CacheLineStep<BufEl_t>()];
45 thisMin = std::min(thisMin, v);
46 thisMax = std::max(thisMax, v);
47}
48
49FillHelper::FillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots)
50 : fResultHist(h), fNSlots(nSlots), fBufSize(fgTotalBufSize / nSlots), fPartialHists(fNSlots),
51 fMin(nSlots * CacheLineStep<BufEl_t>(), std::numeric_limits<BufEl_t>::max()),
52 fMax(nSlots * CacheLineStep<BufEl_t>(), std::numeric_limits<BufEl_t>::lowest())
53{
54 fBuffers.reserve(fNSlots);
55 fWBuffers.reserve(fNSlots);
56 for (unsigned int i = 0; i < fNSlots; ++i) {
57 Buf_t v;
58 v.reserve(fBufSize);
59 fBuffers.emplace_back(v);
60 fWBuffers.emplace_back(v);
61 }
62}
63
64void FillHelper::Exec(unsigned int slot, double v)
65{
66 UpdateMinMax(slot, v);
67 fBuffers[slot].emplace_back(v);
68}
69
70void FillHelper::Exec(unsigned int slot, double v, double w)
71{
72 UpdateMinMax(slot, v);
73 fBuffers[slot].emplace_back(v);
74 fWBuffers[slot].emplace_back(w);
75}
76
77Hist_t &FillHelper::PartialUpdate(unsigned int slot)
78{
79 auto &partialHist = fPartialHists[slot];
80 // TODO it is inefficient to re-create the partial histogram everytime the callback is called
81 // ideally we could incrementally fill it with the latest entries in the buffers
82 partialHist = std::make_unique<Hist_t>(*fResultHist);
83 auto weights = fWBuffers[slot].empty() ? nullptr : fWBuffers[slot].data();
84 partialHist->FillN(fBuffers[slot].size(), fBuffers[slot].data(), weights);
85 return *partialHist;
86}
87
88void FillHelper::Finalize()
89{
90 for (unsigned int i = 0; i < fNSlots; ++i) {
91 if (!fWBuffers[i].empty() && fBuffers[i].size() != fWBuffers[i].size()) {
92 throw std::runtime_error("Cannot fill weighted histogram with values in containers of different sizes.");
93 }
94 }
95
96 BufEl_t globalMin = *std::min_element(fMin.begin(), fMin.end());
97 BufEl_t globalMax = *std::max_element(fMax.begin(), fMax.end());
98
99 if (fResultHist->CanExtendAllAxes() && globalMin != std::numeric_limits<BufEl_t>::max() &&
100 globalMax != std::numeric_limits<BufEl_t>::lowest()) {
101 fResultHist->SetBins(fResultHist->GetNbinsX(), globalMin, globalMax);
102 }
103
104 for (unsigned int i = 0; i < fNSlots; ++i) {
105 auto weights = fWBuffers[i].empty() ? nullptr : fWBuffers[i].data();
106 fResultHist->FillN(fBuffers[i].size(), fBuffers[i].data(), weights);
107 }
108}
109
110template void FillHelper::Exec(unsigned int, const std::vector<float> &);
111template void FillHelper::Exec(unsigned int, const std::vector<double> &);
112template void FillHelper::Exec(unsigned int, const std::vector<char> &);
113template void FillHelper::Exec(unsigned int, const std::vector<int> &);
114template void FillHelper::Exec(unsigned int, const std::vector<unsigned int> &);
115template void FillHelper::Exec(unsigned int, const std::vector<float> &, const std::vector<float> &);
116template void FillHelper::Exec(unsigned int, const std::vector<double> &, const std::vector<double> &);
117template void FillHelper::Exec(unsigned int, const std::vector<char> &, const std::vector<char> &);
118template void FillHelper::Exec(unsigned int, const std::vector<int> &, const std::vector<int> &);
119template void FillHelper::Exec(unsigned int, const std::vector<unsigned int> &, const std::vector<unsigned int> &);
120
121// TODO
122// template void MinHelper::Exec(unsigned int, const std::vector<float> &);
123// template void MinHelper::Exec(unsigned int, const std::vector<double> &);
124// template void MinHelper::Exec(unsigned int, const std::vector<char> &);
125// template void MinHelper::Exec(unsigned int, const std::vector<int> &);
126// template void MinHelper::Exec(unsigned int, const std::vector<unsigned int> &);
127
128// template void MaxHelper::Exec(unsigned int, const std::vector<float> &);
129// template void MaxHelper::Exec(unsigned int, const std::vector<double> &);
130// template void MaxHelper::Exec(unsigned int, const std::vector<char> &);
131// template void MaxHelper::Exec(unsigned int, const std::vector<int> &);
132// template void MaxHelper::Exec(unsigned int, const std::vector<unsigned int> &);
133
134MeanHelper::MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots)
135 : fResultMean(meanVPtr), fCounts(nSlots, 0), fSums(nSlots, 0), fPartialMeans(nSlots)
136{
137}
138
139void MeanHelper::Exec(unsigned int slot, double v)
140{
141 fSums[slot] += v;
142 fCounts[slot]++;
143}
144
145void MeanHelper::Finalize()
146{
147 double sumOfSums = 0;
148 for (auto &s : fSums)
149 sumOfSums += s;
150 ULong64_t sumOfCounts = 0;
151 for (auto &c : fCounts)
152 sumOfCounts += c;
153 *fResultMean = sumOfSums / (sumOfCounts > 0 ? sumOfCounts : 1);
154}
155
156double &MeanHelper::PartialUpdate(unsigned int slot)
157{
158 fPartialMeans[slot] = fSums[slot] / fCounts[slot];
159 return fPartialMeans[slot];
160}
161
162template void MeanHelper::Exec(unsigned int, const std::vector<float> &);
163template void MeanHelper::Exec(unsigned int, const std::vector<double> &);
164template void MeanHelper::Exec(unsigned int, const std::vector<char> &);
165template void MeanHelper::Exec(unsigned int, const std::vector<int> &);
166template void MeanHelper::Exec(unsigned int, const std::vector<unsigned int> &);
167
168StdDevHelper::StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots)
169 : fNSlots(nSlots), fResultStdDev(meanVPtr), fCounts(nSlots, 0), fMeans(nSlots, 0), fDistancesfromMean(nSlots, 0)
170{
171}
172
173void StdDevHelper::Exec(unsigned int slot, double v)
174{
175 // Applies the Welford's algorithm to the stream of values received by the thread
176 auto count = ++fCounts[slot];
177 auto delta = v - fMeans[slot];
178 auto mean = fMeans[slot] + delta / count;
179 auto delta2 = v - mean;
180 auto distance = fDistancesfromMean[slot] + delta * delta2;
181
182 fCounts[slot] = count;
183 fMeans[slot] = mean;
184 fDistancesfromMean[slot] = distance;
185}
186
187void StdDevHelper::Finalize()
188{
189 // Evaluates and merges the partial result of each set of data to get the overall standard deviation.
190 double totalElements = 0;
191 for (auto c : fCounts) {
192 totalElements += c;
193 }
194 if (totalElements == 0 || totalElements == 1) {
195 // Std deviation is not defined for 1 element.
196 *fResultStdDev = 0;
197 return;
198 }
199
200 double overallMean = 0;
201 for (unsigned int i = 0; i < fNSlots; ++i) {
202 overallMean += fCounts[i] * fMeans[i];
203 }
204 overallMean = overallMean / totalElements;
205
206 double variance = 0;
207 for (unsigned int i = 0; i < fNSlots; ++i) {
208 if (fCounts[i] == 0) {
209 continue;
210 }
211 auto setVariance = fDistancesfromMean[i] / (fCounts[i]);
212 variance += (fCounts[i]) * (setVariance + std::pow((fMeans[i] - overallMean), 2));
213 }
214
215 variance = variance / (totalElements - 1);
216 *fResultStdDev = std::sqrt(variance);
217}
218
219template void StdDevHelper::Exec(unsigned int, const std::vector<float> &);
220template void StdDevHelper::Exec(unsigned int, const std::vector<double> &);
221template void StdDevHelper::Exec(unsigned int, const std::vector<char> &);
222template void StdDevHelper::Exec(unsigned int, const std::vector<int> &);
223template void StdDevHelper::Exec(unsigned int, const std::vector<unsigned int> &);
224
225// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
226#if __GNUC__ > 5
227template class TakeHelper<bool, bool, std::vector<bool>>;
228template class TakeHelper<unsigned int, unsigned int, std::vector<unsigned int>>;
229template class TakeHelper<unsigned long, unsigned long, std::vector<unsigned long>>;
230template class TakeHelper<unsigned long long, unsigned long long, std::vector<unsigned long long>>;
231template class TakeHelper<int, int, std::vector<int>>;
232template class TakeHelper<long, long, std::vector<long>>;
233template class TakeHelper<long long, long long, std::vector<long long>>;
234template class TakeHelper<float, float, std::vector<float>>;
235template class TakeHelper<double, double, std::vector<double>>;
236#endif
237
238void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
239{
240 TString fileMode = opts.fMode;
241 fileMode.ToLower();
242 if (fileMode != "update")
243 return;
244
245 // output file opened in "update" mode: must check whether output TTree is already present in file
246 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
247 if (!outFile || outFile->IsZombie())
248 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
249
250 TObject *outTree = outFile->Get(treeName.c_str());
251 if (outTree == nullptr)
252 return;
253
254 // object called treeName is already present in the file
255 if (opts.fOverwriteIfExists) {
256 if (outTree->InheritsFrom("TTree")) {
257 static_cast<TTree *>(outTree)->Delete("all");
258 } else {
259 outFile->Delete(treeName.c_str());
260 }
261 } else {
262 const std::string msg = "Snapshot: tree \"" + treeName + "\" already present in file \"" + fileName +
263 "\". If you want to delete the original tree and write another, please set "
264 "RSnapshotOptions::fOverwriteIfExists to true.";
265 throw std::invalid_argument(msg);
266 }
267}
268
269} // end NS RDF
270} // end NS Internal
271} // end NS ROOT
#define c(i)
Definition RSha256.hxx:101
#define h(i)
Definition RSha256.hxx:106
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
unsigned long long ULong64_t
Definition RtypesCore.h:81
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:4025
Mother of all ROOT objects.
Definition TObject.h:41
virtual Bool_t InheritsFrom(const char *classname) const
Returns kTRUE if object inherits from class "classname".
Definition TObject.cxx:515
Basic string class.
Definition TString.h:136
void ToLower()
Change string to lower-case.
Definition TString.cxx:1150
A TTree represents a columnar dataset.
Definition TTree.h:79
virtual void Delete(Option_t *option="")
Delete this tree from memory or/and disk.
Definition TTree.cxx:3741
void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
constexpr std::size_t CacheLineStep()
Stepping through CacheLineStep<T> values in a vector<T> brings you to a new cache line.
Definition Utils.hxx:220
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
A collection of options to steer the creation of the dataset on file.
std::string fMode
Mode of creation of output file.
bool fOverwriteIfExists
If fMode is "UPDATE", overwrite object in output file if it already exists.