Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RHistStats.hxx
Go to the documentation of this file.
1/// \file
2/// \warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
3/// Feedback is welcome!
4
5#ifndef ROOT_RHistStats
6#define ROOT_RHistStats
7
8#include "RHistUtils.hxx"
10#include "RWeight.hxx"
11
12#include <cmath>
13#include <cstdint>
14#include <stdexcept>
15#include <tuple>
16#include <vector>
17
18class TBuffer;
19
20namespace ROOT {
21namespace Experimental {
22
23/**
24Histogram statistics of unbinned values.
25
26Every call to \ref Fill(const A &... args) "Fill" updates sums to compute the number of effective entries as well as the
27arithmetic mean and other statistical quantities per dimension:
28\code
29ROOT::Experimental::RHistStats stats(1);
30stats.Fill(8.5);
31stats.Fill(1.5);
32// stats.ComputeMean() will return 5.0
33\endcode
34
35\warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
36Feedback is welcome!
37*/
39public:
40 /// Statistics for one dimension.
42 double fSumWX = 0.0;
43 double fSumWX2 = 0.0;
44 double fSumWX3 = 0.0;
45 double fSumWX4 = 0.0;
46
47 void Add(double x)
48 {
49 fSumWX += x;
50 fSumWX2 += x * x;
51 fSumWX3 += x * x * x;
52 fSumWX4 += x * x * x * x;
53 }
54
55 void Add(double x, double w)
56 {
57 fSumWX += w * x;
58 fSumWX2 += w * x * x;
59 fSumWX3 += w * x * x * x;
60 fSumWX4 += w * x * x * x * x;
61 }
62
64 {
65 fSumWX += other.fSumWX;
66 fSumWX2 += other.fSumWX2;
67 fSumWX3 += other.fSumWX3;
68 fSumWX4 += other.fSumWX4;
69 }
70
71 /// Add another statistics object using atomic instructions.
72 ///
73 /// \param[in] other another statistics object that must not be modified during the operation
81
82 void Clear()
83 {
84 fSumWX = 0.0;
85 fSumWX2 = 0.0;
86 fSumWX3 = 0.0;
87 fSumWX4 = 0.0;
88 }
89
90 void Scale(double factor)
91 {
92 fSumWX *= factor;
93 fSumWX2 *= factor;
94 fSumWX3 *= factor;
95 fSumWX4 *= factor;
96 }
97 };
98
99private:
100 /// The number of entries
101 std::uint64_t fNEntries = 0;
102 /// The sum of weights
103 double fSumW = 0.0;
104 /// The sum of weights squared
105 double fSumW2 = 0.0;
106 /// The sums per dimension
107 std::vector<RDimensionStats> fDimensionStats;
108
109public:
110 /// Construct a statistics object.
111 ///
112 /// \param[in] nDimensions the number of dimensions, must be > 0
113 explicit RHistStats(std::size_t nDimensions)
114 {
115 if (nDimensions == 0) {
116 throw std::invalid_argument("nDimensions must be > 0");
117 }
119 }
120
121 std::size_t GetNDimensions() const { return fDimensionStats.size(); }
122
123 std::uint64_t GetNEntries() const { return fNEntries; }
124 double GetSumW() const { return fSumW; }
125 double GetSumW2() const { return fSumW2; }
126
127 const RDimensionStats &GetDimensionStats(std::size_t dim = 0) const { return fDimensionStats.at(dim); }
128
129 /// Add all entries from another statistics object.
130 ///
131 /// Throws an exception if the number of dimensions are not identical.
132 ///
133 /// \param[in] other another statistics object
134 void Add(const RHistStats &other)
135 {
136 if (fDimensionStats.size() != other.fDimensionStats.size()) {
137 throw std::invalid_argument("number of dimensions not identical in Add");
138 }
139 fNEntries += other.fNEntries;
140 fSumW += other.fSumW;
141 fSumW2 += other.fSumW2;
142 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
143 fDimensionStats[i].Add(other.fDimensionStats[i]);
144 }
145 }
146
147 /// Add all entries from another statistics object using atomic instructions.
148 ///
149 /// Throws an exception if the number of dimensions are not identical.
150 ///
151 /// \param[in] other another statistics object that must not be modified during the operation
153 {
154 if (fDimensionStats.size() != other.fDimensionStats.size()) {
155 throw std::invalid_argument("number of dimensions not identical in Add");
156 }
160 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
161 fDimensionStats[i].AddAtomic(other.fDimensionStats[i]);
162 }
163 }
164
165 /// Clear this statistics object.
166 void Clear()
167 {
168 fNEntries = 0;
169 fSumW = 0;
170 fSumW2 = 0;
171 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
172 fDimensionStats[i].Clear();
173 }
174 }
175
176 /// Compute the number of effective entries.
177 ///
178 /// \f[
179 /// \frac{(\sum w_i)^2}{\sum w_i^2}
180 /// \f]
181 ///
182 /// \return the number of effective entries
184 {
185 if (fSumW2 == 0) {
186 return 0;
187 }
188 return fSumW * fSumW / fSumW2;
189 }
190
191 /// Compute the arithmetic mean of unbinned values.
192 ///
193 /// \f[
194 /// \mu = \frac{\sum w_i \cdot x_i}{\sum w_i}
195 /// \f]
196 ///
197 /// \param[in] dim the dimension index, starting at 0
198 /// \return the arithmetic mean of unbinned values
199 double ComputeMean(std::size_t dim = 0) const
200 {
201 // First get the statistics, which includes checking the argument.
202 auto &stats = fDimensionStats.at(dim);
203 if (fSumW == 0) {
204 return 0;
205 }
206 return stats.fSumWX / fSumW;
207 }
208
209 /// Compute the variance of unbinned values.
210 ///
211 /// This function computes the uncorrected sample variance:
212 /// \f[
213 /// \sigma^2 = \frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2
214 /// \f]
215 /// With some rewriting, this is equivalent to:
216 /// \f[
217 /// \sigma^2 = \frac{\sum w_i \cdot x_i^2}{\sum w_i} - \mu^2
218 /// \f]
219 ///
220 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
221 /// In other words, the return value is a biased estimation lower than the actual population variance.
222 ///
223 /// \param[in] dim the dimension index, starting at 0
224 /// \return the variance of unbinned values
225 double ComputeVariance(std::size_t dim = 0) const
226 {
227 // First get the statistics, which includes checking the argument.
228 auto &stats = fDimensionStats.at(dim);
229 if (fSumW == 0) {
230 return 0;
231 }
232 double mean = ComputeMean(dim);
233 return stats.fSumWX2 / fSumW - mean * mean;
234 }
235
236 /// Compute the standard deviation of unbinned values.
237 ///
238 /// This function computes the uncorrected sample standard deviation:
239 /// \f[
240 /// \sigma = \sqrt{\frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2}
241 /// \f]
242 /// With some rewriting, this is equivalent to:
243 /// \f[
244 /// \sigma = \sqrt{\frac{\sum w_i \cdot x_i^2}{\sum w_i} - \frac{(\sum w_i \cdot x_i)^2}{(\sum w_i)^2}}
245 /// \f]
246 ///
247 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
248 /// In other words, the return value is a biased estimation lower than the actual population standard deviation.
249 ///
250 /// \param[in] dim the dimension index, starting at 0
251 /// \return the standard deviation of unbinned values
252 double ComputeStdDev(std::size_t dim = 0) const { return std::sqrt(ComputeVariance(dim)); }
253
254 // clang-format off
255 /// Compute the skewness of unbinned values.
256 ///
257 /// The skewness is the third standardized moment:
258 /// \f[
259 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^3\right]
260 /// \f]
261 /// With support for weighted filling and after some rewriting, it is computed as:
262 /// \f[
263 /// \frac{\frac{\sum w_i \cdot x_i^3}{\sum w_i} - 3 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu + 2 \cdot \mu^3}{\sigma^3}
264 /// \f]
265 ///
266 /// \param[in] dim the dimension index, starting at 0
267 /// \return the skewness of unbinned values
268 // clang-format on
269 double ComputeSkewness(std::size_t dim = 0) const
270 {
271 // First get the statistics, which includes checking the argument.
272 auto &stats = fDimensionStats.at(dim);
273 if (fSumW == 0) {
274 return 0;
275 }
276 double mean = ComputeMean(dim);
277 double var = ComputeVariance(dim);
278 if (var == 0) {
279 return 0;
280 }
281 double EWX3 = stats.fSumWX3 / fSumW;
282 double EWX2 = stats.fSumWX2 / fSumW;
283 return (EWX3 - 3 * EWX2 * mean + 2 * mean * mean * mean) / std::pow(var, 1.5);
284 }
285
286 // clang-format off
287 /// Compute the (excess) kurtosis of unbinned values.
288 ///
289 /// The kurtosis is based on the fourth standardized moment:
290 /// \f[
291 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right]
292 /// \f]
293 /// The excess kurtosis subtracts 3 from the standardized moment to have a value of 0 for a normal distribution:
294 /// \f[
295 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right] - 3
296 /// \f]
297 ///
298 /// With support for weighted filling and after some rewriting, the (excess kurtosis) is computed as:
299 /// \f[
300 /// \frac{\frac{\sum w_i \cdot x_i^4}{\sum w_i} - 4 \cdot \frac{\sum w_i \cdot x_i^3}{\sum w_i} \cdot \mu + 6 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu^2 - 3 \cdot \mu^4}{\sigma^4} - 3
301 /// \f]
302 ///
303 /// \param[in] dim the dimension index, starting at 0
304 /// \return the (excess) kurtosis of unbinned values
305 // clang-format on
306 double ComputeKurtosis(std::size_t dim = 0) const
307 {
308 // First get the statistics, which includes checking the argument.
309 auto &stats = fDimensionStats.at(dim);
310 if (fSumW == 0) {
311 return 0;
312 }
313 double mean = ComputeMean(dim);
314 double var = ComputeVariance(dim);
315 if (var == 0) {
316 return 0;
317 }
318 double EWX4 = stats.fSumWX4 / fSumW;
319 double EWX3 = stats.fSumWX3 / fSumW;
320 double EWX2 = stats.fSumWX2 / fSumW;
321 return (EWX4 - 4 * EWX3 * mean + 6 * EWX2 * mean * mean - 3 * mean * mean * mean * mean) / (var * var) - 3;
322 }
323
324private:
325 template <std::size_t I, typename... A>
326 void FillImpl(const std::tuple<A...> &args)
327 {
328 fDimensionStats[I].Add(std::get<I>(args));
329 if constexpr (I + 1 < sizeof...(A)) {
330 FillImpl<I + 1>(args);
331 }
332 }
333
334 template <std::size_t I, std::size_t N, typename... A>
335 void FillImpl(const std::tuple<A...> &args, double w)
336 {
337 fDimensionStats[I].Add(std::get<I>(args), w);
338 if constexpr (I + 1 < N) {
339 FillImpl<I + 1, N>(args, w);
340 }
341 }
342
343public:
344 /// Fill an entry into this statistics object.
345 ///
346 /// \code
347 /// ROOT::Experimental::RHistStats stats(2);
348 /// auto args = std::make_tuple(8.5, 10.5);
349 /// stats.Fill(args);
350 /// \endcode
351 ///
352 /// Throws an exception if the number of arguments does not match the number of dimensions.
353 ///
354 /// \param[in] args the arguments for each dimension
355 /// \par See also
356 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
357 /// \ref Fill(const std::tuple<A...> &args, RWeight weight) "overload for weighted filling"
358 template <typename... A>
359 void Fill(const std::tuple<A...> &args)
360 {
361 if (sizeof...(A) != fDimensionStats.size()) {
362 throw std::invalid_argument("invalid number of arguments to Fill");
363 }
364 fNEntries++;
365 fSumW++;
366 fSumW2++;
367 FillImpl<0>(args);
368 }
369
370 /// Fill an entry into this statistics object with a weight.
371 ///
372 /// \code
373 /// ROOT::Experimental::RHistStats stats(2);
374 /// auto args = std::make_tuple(8.5, 10.5);
375 /// stats.Fill(args, ROOT::Experimental::RWeight(0.8));
376 /// \endcode
377 ///
378 /// \param[in] args the arguments for each dimension
379 /// \param[in] weight the weight for this entry
380 /// \par See also
381 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
382 /// \ref Fill(const std::tuple<A...> &args) "overload for unweighted filling"
383 template <typename... A>
384 void Fill(const std::tuple<A...> &args, RWeight weight)
385 {
386 if (sizeof...(A) != fDimensionStats.size()) {
387 throw std::invalid_argument("invalid number of arguments to Fill");
388 }
389 fNEntries++;
390 double w = weight.fValue;
391 fSumW += w;
392 fSumW2 += w * w;
393 FillImpl<0, sizeof...(A)>(args, w);
394 }
395
396 /// Fill an entry into this statistics object.
397 ///
398 /// \code
399 /// ROOT::Experimental::RHistStats stats(2);
400 /// stats.Fill(8.5, 10.5);
401 /// \endcode
402 /// For weighted filling, pass an RWeight as the last argument:
403 /// \code
404 /// ROOT::Experimental::RHistStats stats(2);
405 /// stats.Fill(8.5, 10.5, ROOT::Experimental::RWeight(0.8));
406 /// \endcode
407 ///
408 /// Throws an exception if the number of arguments does not match the number of dimensions.
409 ///
410 /// \param[in] args the arguments for each dimension
411 /// \par See also
412 /// the function overloads accepting `std::tuple` \ref Fill(const std::tuple<A...> &args) "for unweighted filling"
413 /// and \ref Fill(const std::tuple<A...> &args, RWeight) "for weighted filling"
414 template <typename... A>
415 void Fill(const A &...args)
416 {
417 auto t = std::forward_as_tuple(args...);
418 if constexpr (std::is_same_v<typename Internal::LastType<A...>::type, RWeight>) {
419 static constexpr std::size_t N = sizeof...(A) - 1;
420 if (N != fDimensionStats.size()) {
421 throw std::invalid_argument("invalid number of arguments to Fill");
422 }
423 fNEntries++;
424 double w = std::get<N>(t).fValue;
425 fSumW += w;
426 fSumW2 += w * w;
427 FillImpl<0, N>(t, w);
428 } else {
429 Fill(t);
430 }
431 }
432
433 /// Scale the histogram statistics.
434 ///
435 /// \param[in] factor the scale factor
436 void Scale(double factor)
437 {
438 fSumW *= factor;
439 fSumW2 *= factor * factor;
440 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
441 fDimensionStats[i].Scale(factor);
442 }
443 }
444
445 /// %ROOT Streamer function to throw when trying to store an object of this class.
446 void Streamer(TBuffer &) { throw std::runtime_error("unable to store RHistStats"); }
447};
448
449} // namespace Experimental
450} // namespace ROOT
451
452#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Histogram statistics of unbinned values.
void Add(const RHistStats &other)
Add all entries from another statistics object.
void Fill(const std::tuple< A... > &args, RWeight weight)
Fill an entry into this statistics object with a weight.
void FillImpl(const std::tuple< A... > &args)
double ComputeSkewness(std::size_t dim=0) const
Compute the skewness of unbinned values.
void Fill(const A &...args)
Fill an entry into this statistics object.
std::vector< RDimensionStats > fDimensionStats
The sums per dimension.
void Clear()
Clear this statistics object.
void FillImpl(const std::tuple< A... > &args, double w)
void AddAtomic(const RHistStats &other)
Add all entries from another statistics object using atomic instructions.
double fSumW
The sum of weights.
std::size_t GetNDimensions() const
const RDimensionStats & GetDimensionStats(std::size_t dim=0) const
double fSumW2
The sum of weights squared.
void Scale(double factor)
Scale the histogram statistics.
void Fill(const std::tuple< A... > &args)
Fill an entry into this statistics object.
double ComputeNEffectiveEntries() const
Compute the number of effective entries.
double ComputeVariance(std::size_t dim=0) const
Compute the variance of unbinned values.
void Streamer(TBuffer &)
ROOT Streamer function to throw when trying to store an object of this class.
std::uint64_t GetNEntries() const
double ComputeMean(std::size_t dim=0) const
Compute the arithmetic mean of unbinned values.
double ComputeKurtosis(std::size_t dim=0) const
Compute the (excess) kurtosis of unbinned values.
RHistStats(std::size_t nDimensions)
Construct a statistics object.
std::uint64_t fNEntries
The number of entries.
double ComputeStdDev(std::size_t dim=0) const
Compute the standard deviation of unbinned values.
Buffer base class used for serializing objects.
Definition TBuffer.h:43
Double_t x[n]
Definition legend1.C:17
#define I(x, y, z)
std::enable_if_t< std::is_integral_v< T > > AtomicAdd(T *ptr, T val)
void AddAtomic(const RDimensionStats &other)
Add another statistics object using atomic instructions.
void Add(const RDimensionStats &other)
A weight for filling histograms.
Definition RWeight.hxx:17