Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RHistStats.hxx
Go to the documentation of this file.
1/// \file
2/// \warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
3/// Feedback is welcome!
4
5#ifndef ROOT_RHistStats
6#define ROOT_RHistStats
7
8#include "RHistUtils.hxx"
9#include "RWeight.hxx"
10
11#include <cassert>
12#include <cmath>
13#include <cstddef>
14#include <cstdint>
15#include <limits>
16#include <stdexcept>
17#include <tuple>
18#include <type_traits>
19#include <vector>
20
21class TBuffer;
22
23namespace ROOT {
24namespace Experimental {
25
26/**
27Histogram statistics of unbinned values.
28
29Every call to \ref Fill(const A &... args) "Fill" updates sums to compute the number of effective entries as well as the
30arithmetic mean and other statistical quantities per dimension:
31\code
32ROOT::Experimental::RHistStats stats(1);
33stats.Fill(8.5);
34stats.Fill(1.5);
35// stats.ComputeMean() will return 5.0
36\endcode
37
38\warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
39Feedback is welcome!
40*/
42public:
43 /// Statistics for one dimension.
45 bool fEnabled = true;
46 double fSumWX = 0.0;
47 double fSumWX2 = 0.0;
48 double fSumWX3 = 0.0;
49 double fSumWX4 = 0.0;
50
51 void Add(double x)
52 {
54 fSumWX += x;
55 fSumWX2 += x * x;
56 fSumWX3 += x * x * x;
57 fSumWX4 += x * x * x * x;
58 }
59
60 void Add(double x, double w)
61 {
63 fSumWX += w * x;
64 fSumWX2 += w * x * x;
65 fSumWX3 += w * x * x * x;
66 fSumWX4 += w * x * x * x * x;
67 }
68
70 {
72 fSumWX += other.fSumWX;
73 fSumWX2 += other.fSumWX2;
74 fSumWX3 += other.fSumWX3;
75 fSumWX4 += other.fSumWX4;
76 }
77
78 /// Add another statistics object using atomic instructions.
79 ///
80 /// \param[in] other another statistics object that must not be modified during the operation
89
90 void Clear()
91 {
92 fSumWX = 0.0;
93 fSumWX2 = 0.0;
94 fSumWX3 = 0.0;
95 fSumWX4 = 0.0;
96 }
97
98 void Scale(double factor)
99 {
101 fSumWX *= factor;
102 fSumWX2 *= factor;
103 fSumWX3 *= factor;
104 fSumWX4 *= factor;
105 }
106 };
107
108private:
109 /// The number of entries
110 std::uint64_t fNEntries = 0;
111 /// The sum of weights
112 double fSumW = 0.0;
113 /// The sum of weights squared
114 double fSumW2 = 0.0;
115 /// The sums per dimension
116 std::vector<RDimensionStats> fDimensionStats;
117 /// Whether this object is tainted, in which case read access will throw. This is used if a user modifies bin
118 /// contents explicitly or slices histograms without preserving all entries, for example.
119 bool fTainted = false;
120
121 void ThrowIfTainted() const
122 {
123 if (fTainted) {
124 throw std::logic_error("statistics are tainted, for example after setting bin contents or slicing");
125 }
126 }
127
128public:
129 /// Construct a statistics object.
130 ///
131 /// \param[in] nDimensions the number of dimensions, must be > 0
132 explicit RHistStats(std::size_t nDimensions)
133 {
134 if (nDimensions == 0) {
135 throw std::invalid_argument("nDimensions must be > 0");
136 }
138 }
139
140 std::size_t GetNDimensions() const { return fDimensionStats.size(); }
141
142 std::uint64_t GetNEntries() const
143 {
145 return fNEntries;
146 }
147 double GetSumW() const
148 {
150 return fSumW;
151 }
152 double GetSumW2() const
153 {
155 return fSumW2;
156 }
157
158 /// Get the statistics object for one dimension.
159 ///
160 /// Throws an exception if the dimension is disabled.
161 ///
162 /// \param[in] dim the dimension index, starting at 0
163 /// \return the statistics object
164 const RDimensionStats &GetDimensionStats(std::size_t dim = 0) const
165 {
167
168 const RDimensionStats &stats = fDimensionStats.at(dim);
169 if (!stats.fEnabled) {
170 throw std::invalid_argument("dimension is disabled");
171 }
172 return stats;
173 }
174
175 /// Disable one dimension of this statistics object.
176 ///
177 /// All future calls to Fill will ignore the corresponding argument. Once disabled, a dimension cannot be reenabled.
178 ///
179 /// \param[in] dim the dimension index, starting at 0
180 void DisableDimension(std::size_t dim) { fDimensionStats.at(dim).fEnabled = false; }
181
182 bool IsEnabled(std::size_t dim) const { return fDimensionStats.at(dim).fEnabled; }
183
184 /// Taint this statistics object.
185 ///
186 /// It can still be filled, but any read access will throw until Clear() is called.
187 void Taint() { fTainted = true; }
188
189 bool IsTainted() const { return fTainted; }
190
191 /// Add all entries from another statistics object.
192 ///
193 /// Throws an exception if the number of dimensions are not identical.
194 ///
195 /// \param[in] other another statistics object
196 void Add(const RHistStats &other)
197 {
198 // NB: this method does *not* call ThrowIfTainted() to allow adding RHist which may contain a tainted statistics
199 // object.
200 if (fDimensionStats.size() != other.fDimensionStats.size()) {
201 throw std::invalid_argument("number of dimensions not identical in Add");
202 }
203 fNEntries += other.fNEntries;
204 fSumW += other.fSumW;
205 fSumW2 += other.fSumW2;
206 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
207 if (fDimensionStats[i].fEnabled != other.fDimensionStats[i].fEnabled) {
208 throw std::invalid_argument("the same dimensions must be enabled to combine statistics with Add");
209 }
210 if (fDimensionStats[i].fEnabled) {
211 fDimensionStats[i].Add(other.fDimensionStats[i]);
212 }
213 }
214 fTainted |= other.fTainted;
215 }
216
217 /// Add all entries from another statistics object using atomic instructions.
218 ///
219 /// Throws an exception if the number of dimensions are not identical.
220 ///
221 /// \param[in] other another statistics object that must not be modified during the operation
223 {
224 // NB: this method does *not* call ThrowIfTainted() to allow adding RHist which may contain a tainted statistics
225 // object.
226 if (fDimensionStats.size() != other.fDimensionStats.size()) {
227 throw std::invalid_argument("number of dimensions not identical in Add");
228 }
232 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
233 if (fDimensionStats[i].fEnabled != other.fDimensionStats[i].fEnabled) {
234 throw std::invalid_argument("the same dimensions must be enabled to combine statistics with Add");
235 }
236 if (fDimensionStats[i].fEnabled) {
237 fDimensionStats[i].AddAtomic(other.fDimensionStats[i]);
238 }
239 }
240 fTainted |= other.fTainted;
241 }
242
243 /// Clear this statistics object.
244 void Clear()
245 {
246 fNEntries = 0;
247 fSumW = 0;
248 fSumW2 = 0;
249 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
250 fDimensionStats[i].Clear();
251 }
252 fTainted = false;
253 }
254
255 /// Compute the number of effective entries.
256 ///
257 /// \f[
258 /// \frac{(\sum w_i)^2}{\sum w_i^2}
259 /// \f]
260 ///
261 /// \return the number of effective entries
263 {
265 if (fSumW2 == 0) {
266 return std::numeric_limits<double>::signaling_NaN();
267 }
268 return fSumW * fSumW / fSumW2;
269 }
270
271 /// Compute the arithmetic mean of unbinned values.
272 ///
273 /// \f[
274 /// \mu = \frac{\sum w_i \cdot x_i}{\sum w_i}
275 /// \f]
276 ///
277 /// \param[in] dim the dimension index, starting at 0
278 /// \return the arithmetic mean of unbinned values
279 double ComputeMean(std::size_t dim = 0) const
280 {
281 // First get the statistics, which includes checking the argument.
282 auto &stats = GetDimensionStats(dim);
283 if (fSumW == 0) {
284 return std::numeric_limits<double>::signaling_NaN();
285 }
286 return stats.fSumWX / fSumW;
287 }
288
289 /// Compute the variance of unbinned values.
290 ///
291 /// This function computes the uncorrected sample variance:
292 /// \f[
293 /// \sigma^2 = \frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2
294 /// \f]
295 /// With some rewriting, this is equivalent to:
296 /// \f[
297 /// \sigma^2 = \frac{\sum w_i \cdot x_i^2}{\sum w_i} - \mu^2
298 /// \f]
299 ///
300 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
301 /// In other words, the return value is a biased estimation lower than the actual population variance.
302 ///
303 /// \param[in] dim the dimension index, starting at 0
304 /// \return the variance of unbinned values
305 double ComputeVariance(std::size_t dim = 0) const
306 {
307 // First get the statistics, which includes checking the argument.
308 auto &stats = GetDimensionStats(dim);
309 if (fSumW == 0) {
310 return std::numeric_limits<double>::signaling_NaN();
311 }
312 double mean = ComputeMean(dim);
313 return stats.fSumWX2 / fSumW - mean * mean;
314 }
315
316 /// Compute the standard deviation of unbinned values.
317 ///
318 /// This function computes the uncorrected sample standard deviation:
319 /// \f[
320 /// \sigma = \sqrt{\frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2}
321 /// \f]
322 /// With some rewriting, this is equivalent to:
323 /// \f[
324 /// \sigma = \sqrt{\frac{\sum w_i \cdot x_i^2}{\sum w_i} - \frac{(\sum w_i \cdot x_i)^2}{(\sum w_i)^2}}
325 /// \f]
326 ///
327 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
328 /// In other words, the return value is a biased estimation lower than the actual population standard deviation.
329 ///
330 /// \param[in] dim the dimension index, starting at 0
331 /// \return the standard deviation of unbinned values
332 double ComputeStdDev(std::size_t dim = 0) const { return std::sqrt(ComputeVariance(dim)); }
333
334 // clang-format off
335 /// Compute the skewness of unbinned values.
336 ///
337 /// The skewness is the third standardized moment:
338 /// \f[
339 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^3\right]
340 /// \f]
341 /// With support for weighted filling and after some rewriting, it is computed as:
342 /// \f[
343 /// \frac{\frac{\sum w_i \cdot x_i^3}{\sum w_i} - 3 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu + 2 \cdot \mu^3}{\sigma^3}
344 /// \f]
345 ///
346 /// \param[in] dim the dimension index, starting at 0
347 /// \return the skewness of unbinned values
348 // clang-format on
349 double ComputeSkewness(std::size_t dim = 0) const
350 {
351 // First get the statistics, which includes checking the argument.
352 auto &stats = GetDimensionStats(dim);
353 if (fSumW == 0) {
354 return std::numeric_limits<double>::signaling_NaN();
355 }
356 double mean = ComputeMean(dim);
357 double var = ComputeVariance(dim);
358 if (var == 0) {
359 return std::numeric_limits<double>::signaling_NaN();
360 }
361 double EWX3 = stats.fSumWX3 / fSumW;
362 double EWX2 = stats.fSumWX2 / fSumW;
363 return (EWX3 - 3 * EWX2 * mean + 2 * mean * mean * mean) / std::pow(var, 1.5);
364 }
365
366 // clang-format off
367 /// Compute the (excess) kurtosis of unbinned values.
368 ///
369 /// The kurtosis is based on the fourth standardized moment:
370 /// \f[
371 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right]
372 /// \f]
373 /// The excess kurtosis subtracts 3 from the standardized moment to have a value of 0 for a normal distribution:
374 /// \f[
375 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right] - 3
376 /// \f]
377 ///
378 /// With support for weighted filling and after some rewriting, the (excess kurtosis) is computed as:
379 /// \f[
380 /// \frac{\frac{\sum w_i \cdot x_i^4}{\sum w_i} - 4 \cdot \frac{\sum w_i \cdot x_i^3}{\sum w_i} \cdot \mu + 6 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu^2 - 3 \cdot \mu^4}{\sigma^4} - 3
381 /// \f]
382 ///
383 /// \param[in] dim the dimension index, starting at 0
384 /// \return the (excess) kurtosis of unbinned values
385 // clang-format on
386 double ComputeKurtosis(std::size_t dim = 0) const
387 {
388 // First get the statistics, which includes checking the argument.
389 auto &stats = GetDimensionStats(dim);
390 if (fSumW == 0) {
391 return std::numeric_limits<double>::signaling_NaN();
392 }
393 double mean = ComputeMean(dim);
394 double var = ComputeVariance(dim);
395 if (var == 0) {
396 return std::numeric_limits<double>::signaling_NaN();
397 }
398 double EWX4 = stats.fSumWX4 / fSumW;
399 double EWX3 = stats.fSumWX3 / fSumW;
400 double EWX2 = stats.fSumWX2 / fSumW;
401 return (EWX4 - 4 * EWX3 * mean + 6 * EWX2 * mean * mean - 3 * mean * mean * mean * mean) / (var * var) - 3;
402 }
403
404private:
405 template <std::size_t I, typename... A>
406 void FillImpl(const std::tuple<A...> &args)
407 {
408 using ArgumentType = std::tuple_element_t<I, std::tuple<A...>>;
409 if (fDimensionStats[I].fEnabled) {
410 if constexpr (std::is_convertible_v<ArgumentType, double>) {
411 fDimensionStats[I].Add(std::get<I>(args));
412 } else {
413 throw std::invalid_argument("invalid type of argument in RHistStats");
414 }
415 }
416 if constexpr (I + 1 < sizeof...(A)) {
417 FillImpl<I + 1>(args);
418 }
419 }
420
421 template <std::size_t I, std::size_t N, typename... A>
422 void FillImpl(const std::tuple<A...> &args, double w)
423 {
424 using ArgumentType = std::tuple_element_t<I, std::tuple<A...>>;
425 if (fDimensionStats[I].fEnabled) {
426 if constexpr (std::is_convertible_v<ArgumentType, double>) {
427 fDimensionStats[I].Add(std::get<I>(args), w);
428 } else {
429 // Avoid compiler warning about unused parameter...
430 (void)w;
431 throw std::invalid_argument("invalid type of argument in RHistStats");
432 }
433 }
434 if constexpr (I + 1 < N) {
435 FillImpl<I + 1, N>(args, w);
436 }
437 }
438
439public:
440 /// Fill an entry into this statistics object.
441 ///
442 /// \code
443 /// ROOT::Experimental::RHistStats stats(2);
444 /// auto args = std::make_tuple(8.5, 10.5);
445 /// stats.Fill(args);
446 /// \endcode
447 ///
448 /// Throws an exception if the number of arguments does not match the number of dimensions.
449 ///
450 /// \param[in] args the arguments for each dimension
451 /// \par See also
452 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
453 /// \ref Fill(const std::tuple<A...> &args, RWeight weight) "overload for weighted filling"
454 template <typename... A>
455 void Fill(const std::tuple<A...> &args)
456 {
457 if (sizeof...(A) != fDimensionStats.size()) {
458 throw std::invalid_argument("invalid number of arguments to Fill");
459 }
460 fNEntries++;
461 fSumW++;
462 fSumW2++;
463 FillImpl<0>(args);
464 }
465
466 /// Fill an entry into this statistics object with a weight.
467 ///
468 /// \code
469 /// ROOT::Experimental::RHistStats stats(2);
470 /// auto args = std::make_tuple(8.5, 10.5);
471 /// stats.Fill(args, ROOT::Experimental::RWeight(0.8));
472 /// \endcode
473 ///
474 /// \param[in] args the arguments for each dimension
475 /// \param[in] weight the weight for this entry
476 /// \par See also
477 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
478 /// \ref Fill(const std::tuple<A...> &args) "overload for unweighted filling"
479 template <typename... A>
480 void Fill(const std::tuple<A...> &args, RWeight weight)
481 {
482 if (sizeof...(A) != fDimensionStats.size()) {
483 throw std::invalid_argument("invalid number of arguments to Fill");
484 }
485 fNEntries++;
486 double w = weight.fValue;
487 fSumW += w;
488 fSumW2 += w * w;
489 FillImpl<0, sizeof...(A)>(args, w);
490 }
491
492 /// Fill an entry into this statistics object.
493 ///
494 /// \code
495 /// ROOT::Experimental::RHistStats stats(2);
496 /// stats.Fill(8.5, 10.5);
497 /// \endcode
498 /// For weighted filling, pass an RWeight as the last argument:
499 /// \code
500 /// ROOT::Experimental::RHistStats stats(2);
501 /// stats.Fill(8.5, 10.5, ROOT::Experimental::RWeight(0.8));
502 /// \endcode
503 ///
504 /// Throws an exception if the number of arguments does not match the number of dimensions.
505 ///
506 /// \param[in] args the arguments for each dimension
507 /// \par See also
508 /// the function overloads accepting `std::tuple` \ref Fill(const std::tuple<A...> &args) "for unweighted filling"
509 /// and \ref Fill(const std::tuple<A...> &args, RWeight) "for weighted filling"
510 template <typename... A>
511 void Fill(const A &...args)
512 {
513 static_assert(sizeof...(A) >= 1, "need at least one argument to Fill");
514 if constexpr (sizeof...(A) >= 1) {
515 auto t = std::forward_as_tuple(args...);
516 if constexpr (std::is_same_v<typename Internal::LastType<A...>::type, RWeight>) {
517 static constexpr std::size_t N = sizeof...(A) - 1;
518 if (N != fDimensionStats.size()) {
519 throw std::invalid_argument("invalid number of arguments to Fill");
520 }
521 fNEntries++;
522 double w = std::get<N>(t).fValue;
523 fSumW += w;
524 fSumW2 += w * w;
525 FillImpl<0, N>(t, w);
526 } else {
527 Fill(t);
528 }
529 }
530 }
531
532 /// Scale the histogram statistics.
533 ///
534 /// \param[in] factor the scale factor
535 void Scale(double factor)
536 {
537 // NB: this method does *not* call ThrowIfTainted() to allow scaling RHist which may contain a tainted statistics
538 // object.
539 fSumW *= factor;
540 fSumW2 *= factor * factor;
541 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
542 if (fDimensionStats[i].fEnabled) {
543 fDimensionStats[i].Scale(factor);
544 }
545 }
546 }
547
548 /// %ROOT Streamer function to throw when trying to store an object of this class.
549 void Streamer(TBuffer &) { throw std::runtime_error("unable to store RHistStats"); }
550};
551
552} // namespace Experimental
553} // namespace ROOT
554
555#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Histogram statistics of unbinned values.
void Add(const RHistStats &other)
Add all entries from another statistics object.
void Fill(const std::tuple< A... > &args, RWeight weight)
Fill an entry into this statistics object with a weight.
void FillImpl(const std::tuple< A... > &args)
double ComputeSkewness(std::size_t dim=0) const
Compute the skewness of unbinned values.
void Fill(const A &...args)
Fill an entry into this statistics object.
std::vector< RDimensionStats > fDimensionStats
The sums per dimension.
void Clear()
Clear this statistics object.
void Taint()
Taint this statistics object.
void FillImpl(const std::tuple< A... > &args, double w)
void AddAtomic(const RHistStats &other)
Add all entries from another statistics object using atomic instructions.
double fSumW
The sum of weights.
std::size_t GetNDimensions() const
const RDimensionStats & GetDimensionStats(std::size_t dim=0) const
Get the statistics object for one dimension.
double fSumW2
The sum of weights squared.
void Scale(double factor)
Scale the histogram statistics.
void Fill(const std::tuple< A... > &args)
Fill an entry into this statistics object.
double ComputeNEffectiveEntries() const
Compute the number of effective entries.
void DisableDimension(std::size_t dim)
Disable one dimension of this statistics object.
double ComputeVariance(std::size_t dim=0) const
Compute the variance of unbinned values.
void Streamer(TBuffer &)
ROOT Streamer function to throw when trying to store an object of this class.
std::uint64_t GetNEntries() const
double ComputeMean(std::size_t dim=0) const
Compute the arithmetic mean of unbinned values.
double ComputeKurtosis(std::size_t dim=0) const
Compute the (excess) kurtosis of unbinned values.
RHistStats(std::size_t nDimensions)
Construct a statistics object.
bool IsEnabled(std::size_t dim) const
std::uint64_t fNEntries
The number of entries.
bool fTainted
Whether this object is tainted, in which case read access will throw.
double ComputeStdDev(std::size_t dim=0) const
Compute the standard deviation of unbinned values.
Buffer base class used for serializing objects.
Definition TBuffer.h:43
Double_t x[n]
Definition legend1.C:17
#define I(x, y, z)
std::enable_if_t< std::is_integral_v< T > > AtomicAdd(T *ptr, T val)
void AddAtomic(const RDimensionStats &other)
Add another statistics object using atomic instructions.
void Add(const RDimensionStats &other)
A weight for filling histograms.
Definition RWeight.hxx:17