Logo ROOT  
Reference Guide
RDFActionHelpers.cxx
Go to the documentation of this file.
1// Author: Enrico Guiraud, Danilo Piparo CERN 12/2016
2
3/*************************************************************************
4 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5 * All rights reserved. *
6 * *
7 * For the licensing terms see $ROOTSYS/LICENSE. *
8 * For the list of contributors see $ROOTSYS/README/CREDITS. *
9 *************************************************************************/
10
12#include "ROOT/RDF/Utils.hxx" // CacheLineStep
13
14namespace ROOT {
15namespace Internal {
16namespace RDF {
17
18CountHelper::CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots)
19 : fResultCount(resultCount), fCounts(nSlots, 0)
20{
21}
22
23void CountHelper::Exec(unsigned int slot)
24{
25 fCounts[slot]++;
26}
27
28void CountHelper::Finalize()
29{
30 *fResultCount = 0;
31 for (auto &c : fCounts) {
32 *fResultCount += c;
33 }
34}
35
36ULong64_t &CountHelper::PartialUpdate(unsigned int slot)
37{
38 return fCounts[slot];
39}
40
41void BufferedFillHelper::UpdateMinMax(unsigned int slot, double v)
42{
43 auto &thisMin = fMin[slot * CacheLineStep<BufEl_t>()];
44 auto &thisMax = fMax[slot * CacheLineStep<BufEl_t>()];
45 thisMin = std::min(thisMin, v);
46 thisMax = std::max(thisMax, v);
47}
48
49BufferedFillHelper::BufferedFillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots)
50 : fResultHist(h), fNSlots(nSlots), fBufSize(fgTotalBufSize / nSlots), fPartialHists(fNSlots),
51 fMin(nSlots * CacheLineStep<BufEl_t>(), std::numeric_limits<BufEl_t>::max()),
52 fMax(nSlots * CacheLineStep<BufEl_t>(), std::numeric_limits<BufEl_t>::lowest())
53{
54 fBuffers.reserve(fNSlots);
55 fWBuffers.reserve(fNSlots);
56 for (unsigned int i = 0; i < fNSlots; ++i) {
57 Buf_t v;
58 v.reserve(fBufSize);
59 fBuffers.emplace_back(v);
60 fWBuffers.emplace_back(v);
61 }
62}
63
64void BufferedFillHelper::Exec(unsigned int slot, double v)
65{
66 UpdateMinMax(slot, v);
67 fBuffers[slot].emplace_back(v);
68}
69
70void BufferedFillHelper::Exec(unsigned int slot, double v, double w)
71{
72 UpdateMinMax(slot, v);
73 fBuffers[slot].emplace_back(v);
74 fWBuffers[slot].emplace_back(w);
75}
76
77Hist_t &BufferedFillHelper::PartialUpdate(unsigned int slot)
78{
79 auto &partialHist = fPartialHists[slot];
80 // TODO it is inefficient to re-create the partial histogram everytime the callback is called
81 // ideally we could incrementally fill it with the latest entries in the buffers
82 partialHist = std::make_unique<Hist_t>(*fResultHist);
83 auto weights = fWBuffers[slot].empty() ? nullptr : fWBuffers[slot].data();
84 partialHist->FillN(fBuffers[slot].size(), fBuffers[slot].data(), weights);
85 return *partialHist;
86}
87
88void BufferedFillHelper::Finalize()
89{
90 for (unsigned int i = 0; i < fNSlots; ++i) {
91 if (!fWBuffers[i].empty() && fBuffers[i].size() != fWBuffers[i].size()) {
92 throw std::runtime_error("Cannot fill weighted histogram with values in containers of different sizes.");
93 }
94 }
95
96 BufEl_t globalMin = *std::min_element(fMin.begin(), fMin.end());
97 BufEl_t globalMax = *std::max_element(fMax.begin(), fMax.end());
98
99 if (fResultHist->CanExtendAllAxes() && globalMin != std::numeric_limits<BufEl_t>::max() &&
100 globalMax != std::numeric_limits<BufEl_t>::lowest()) {
101 fResultHist->SetBins(fResultHist->GetNbinsX(), globalMin, globalMax);
102 }
103
104 for (unsigned int i = 0; i < fNSlots; ++i) {
105 auto weights = fWBuffers[i].empty() ? nullptr : fWBuffers[i].data();
106 fResultHist->FillN(fBuffers[i].size(), fBuffers[i].data(), weights);
107 }
108}
109
110template void BufferedFillHelper::Exec(unsigned int, const std::vector<float> &);
111template void BufferedFillHelper::Exec(unsigned int, const std::vector<double> &);
112template void BufferedFillHelper::Exec(unsigned int, const std::vector<char> &);
113template void BufferedFillHelper::Exec(unsigned int, const std::vector<int> &);
114template void BufferedFillHelper::Exec(unsigned int, const std::vector<unsigned int> &);
115template void BufferedFillHelper::Exec(unsigned int, const std::vector<float> &, const std::vector<float> &);
116template void BufferedFillHelper::Exec(unsigned int, const std::vector<double> &, const std::vector<double> &);
117template void BufferedFillHelper::Exec(unsigned int, const std::vector<char> &, const std::vector<char> &);
118template void BufferedFillHelper::Exec(unsigned int, const std::vector<int> &, const std::vector<int> &);
119template void
120BufferedFillHelper::Exec(unsigned int, const std::vector<unsigned int> &, const std::vector<unsigned int> &);
121
122// TODO
123// template void MinHelper::Exec(unsigned int, const std::vector<float> &);
124// template void MinHelper::Exec(unsigned int, const std::vector<double> &);
125// template void MinHelper::Exec(unsigned int, const std::vector<char> &);
126// template void MinHelper::Exec(unsigned int, const std::vector<int> &);
127// template void MinHelper::Exec(unsigned int, const std::vector<unsigned int> &);
128
129// template void MaxHelper::Exec(unsigned int, const std::vector<float> &);
130// template void MaxHelper::Exec(unsigned int, const std::vector<double> &);
131// template void MaxHelper::Exec(unsigned int, const std::vector<char> &);
132// template void MaxHelper::Exec(unsigned int, const std::vector<int> &);
133// template void MaxHelper::Exec(unsigned int, const std::vector<unsigned int> &);
134
135MeanHelper::MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots)
136 : fResultMean(meanVPtr), fCounts(nSlots, 0), fSums(nSlots, 0), fPartialMeans(nSlots), fCompensations(nSlots)
137{
138}
139
140void MeanHelper::Exec(unsigned int slot, double v)
141{
142 fCounts[slot]++;
143 // Kahan Sum:
144 double y = v - fCompensations[slot];
145 double t = fSums[slot] + y;
146 fCompensations[slot] = (t - fSums[slot]) - y;
147 fSums[slot] = t;
148}
149
150void MeanHelper::Finalize()
151{
152 double sumOfSums = 0;
153 // Kahan Sum:
154 double compensation(0);
155 double y(0);
156 double t(0);
157 for (auto &m : fSums) {
158 y = m - compensation;
159 t = sumOfSums + y;
160 compensation = (t - sumOfSums) - y;
161 sumOfSums = t;
162 }
163 ULong64_t sumOfCounts = 0;
164 for (auto &c : fCounts)
165 sumOfCounts += c;
166 *fResultMean = sumOfSums / (sumOfCounts > 0 ? sumOfCounts : 1);
167}
168
169double &MeanHelper::PartialUpdate(unsigned int slot)
170{
171 fPartialMeans[slot] = fSums[slot] / fCounts[slot];
172 return fPartialMeans[slot];
173}
174
175template void MeanHelper::Exec(unsigned int, const std::vector<float> &);
176template void MeanHelper::Exec(unsigned int, const std::vector<double> &);
177template void MeanHelper::Exec(unsigned int, const std::vector<char> &);
178template void MeanHelper::Exec(unsigned int, const std::vector<int> &);
179template void MeanHelper::Exec(unsigned int, const std::vector<unsigned int> &);
180
181StdDevHelper::StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots)
182 : fNSlots(nSlots), fResultStdDev(meanVPtr), fCounts(nSlots, 0), fMeans(nSlots, 0), fDistancesfromMean(nSlots, 0)
183{
184}
185
186void StdDevHelper::Exec(unsigned int slot, double v)
187{
188 // Applies the Welford's algorithm to the stream of values received by the thread
189 auto count = ++fCounts[slot];
190 auto delta = v - fMeans[slot];
191 auto mean = fMeans[slot] + delta / count;
192 auto delta2 = v - mean;
193 auto distance = fDistancesfromMean[slot] + delta * delta2;
194
195 fCounts[slot] = count;
196 fMeans[slot] = mean;
197 fDistancesfromMean[slot] = distance;
198}
199
200void StdDevHelper::Finalize()
201{
202 // Evaluates and merges the partial result of each set of data to get the overall standard deviation.
203 double totalElements = 0;
204 for (auto c : fCounts) {
205 totalElements += c;
206 }
207 if (totalElements == 0 || totalElements == 1) {
208 // Std deviation is not defined for 1 element.
209 *fResultStdDev = 0;
210 return;
211 }
212
213 double overallMean = 0;
214 for (unsigned int i = 0; i < fNSlots; ++i) {
215 overallMean += fCounts[i] * fMeans[i];
216 }
217 overallMean = overallMean / totalElements;
218
219 double variance = 0;
220 for (unsigned int i = 0; i < fNSlots; ++i) {
221 if (fCounts[i] == 0) {
222 continue;
223 }
224 auto setVariance = fDistancesfromMean[i] / (fCounts[i]);
225 variance += (fCounts[i]) * (setVariance + std::pow((fMeans[i] - overallMean), 2));
226 }
227
228 variance = variance / (totalElements - 1);
229 *fResultStdDev = std::sqrt(variance);
230}
231
232template void StdDevHelper::Exec(unsigned int, const std::vector<float> &);
233template void StdDevHelper::Exec(unsigned int, const std::vector<double> &);
234template void StdDevHelper::Exec(unsigned int, const std::vector<char> &);
235template void StdDevHelper::Exec(unsigned int, const std::vector<int> &);
236template void StdDevHelper::Exec(unsigned int, const std::vector<unsigned int> &);
237
238// External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
239#if __GNUC__ > 5
240template class TakeHelper<bool, bool, std::vector<bool>>;
241template class TakeHelper<unsigned int, unsigned int, std::vector<unsigned int>>;
242template class TakeHelper<unsigned long, unsigned long, std::vector<unsigned long>>;
243template class TakeHelper<unsigned long long, unsigned long long, std::vector<unsigned long long>>;
244template class TakeHelper<int, int, std::vector<int>>;
245template class TakeHelper<long, long, std::vector<long>>;
246template class TakeHelper<long long, long long, std::vector<long long>>;
247template class TakeHelper<float, float, std::vector<float>>;
248template class TakeHelper<double, double, std::vector<double>>;
249#endif
250
251void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
252{
253 TString fileMode = opts.fMode;
254 fileMode.ToLower();
255 if (fileMode != "update")
256 return;
257
258 // output file opened in "update" mode: must check whether output TTree is already present in file
259 std::unique_ptr<TFile> outFile{TFile::Open(fileName.c_str(), "update")};
260 if (!outFile || outFile->IsZombie())
261 throw std::invalid_argument("Snapshot: cannot open file \"" + fileName + "\" in update mode");
262
263 TObject *outTree = outFile->Get(treeName.c_str());
264 if (outTree == nullptr)
265 return;
266
267 // object called treeName is already present in the file
268 if (opts.fOverwriteIfExists) {
269 if (outTree->InheritsFrom("TTree")) {
270 static_cast<TTree *>(outTree)->Delete("all");
271 } else {
272 outFile->Delete(treeName.c_str());
273 }
274 } else {
275 const std::string msg = "Snapshot: tree \"" + treeName + "\" already present in file \"" + fileName +
276 "\". If you want to delete the original tree and write another, please set "
277 "RSnapshotOptions::fOverwriteIfExists to true.";
278 throw std::invalid_argument(msg);
279 }
280}
281
282} // end NS RDF
283} // end NS Internal
284} // end NS ROOT
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
unsigned long long ULong64_t
Definition: RtypesCore.h:81
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:4019
Mother of all ROOT objects.
Definition: TObject.h:41
virtual Bool_t InheritsFrom(const char *classname) const
Returns kTRUE if object inherits from class "classname".
Definition: TObject.cxx:519
Basic string class.
Definition: TString.h:136
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1155
A TTree represents a columnar dataset.
Definition: TTree.h:79
void Delete(Option_t *option="") override
Delete this tree from memory or/and disk.
Definition: TTree.cxx:3718
RVec< PromoteTypes< T0, T1 > > pow(const T0 &x, const RVec< T1 > &v)
Definition: RVec.hxx:1770
Double_t y[n]
Definition: legend1.C:17
CPYCPPYY_EXTERN bool Exec(const std::string &cmd)
Definition: API.cxx:333
void ValidateSnapshotOutput(const RSnapshotOptions &opts, const std::string &treeName, const std::string &fileName)
constexpr std::size_t CacheLineStep()
Stepping through CacheLineStep<T> values in a vector<T> brings you to a new cache line.
Definition: Utils.hxx:220
VecExpr< UnaryOp< Sqrt< T >, VecExpr< A, T, D >, T >, T, D > sqrt(const VecExpr< A, T, D > &rhs)
This file contains a specialised ROOT message handler to test for diagnostic in unit tests.
A collection of options to steer the creation of the dataset on file.
std::string fMode
Mode of creation of output file.
bool fOverwriteIfExists
If fMode is "UPDATE", overwrite object in output file if it already exists.
TMarker m
Definition: textangle.C:8