Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RHistStats.hxx
Go to the documentation of this file.
1/// \file
2/// \warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
3/// Feedback is welcome!
4
5#ifndef ROOT_RHistStats
6#define ROOT_RHistStats
7
8#include "RHistUtils.hxx"
9#include "RWeight.hxx"
10
11#include <cassert>
12#include <cmath>
13#include <cstddef>
14#include <cstdint>
15#include <limits>
16#include <stdexcept>
17#include <tuple>
18#include <type_traits>
19#include <vector>
20
21class TBuffer;
22
23namespace ROOT {
24namespace Experimental {
25
26// forward declaration for friend declaration
27template <typename T>
28class RHist;
29
30/**
31Histogram statistics of unbinned values.
32
33Every call to \ref Fill(const A &... args) "Fill" updates sums to compute the number of effective entries as well as the
34arithmetic mean and other statistical quantities per dimension:
35\code
36ROOT::Experimental::RHistStats stats(1);
37stats.Fill(8.5);
38stats.Fill(1.5);
39// stats.ComputeMean() will return 5.0
40\endcode
41
42\warning This is part of the %ROOT 7 prototype! It will change without notice. It might trigger earthquakes.
43Feedback is welcome!
44*/
46 template <typename T>
47 friend class RHist;
48
49public:
50 /// Statistics for one dimension.
52 bool fEnabled = true;
53 double fSumWX = 0.0;
54 double fSumWX2 = 0.0;
55 double fSumWX3 = 0.0;
56 double fSumWX4 = 0.0;
57
58 void Add(double x)
59 {
61 fSumWX += x;
62 fSumWX2 += x * x;
63 fSumWX3 += x * x * x;
64 fSumWX4 += x * x * x * x;
65 }
66
67 void Add(double x, double w)
68 {
70 fSumWX += w * x;
71 fSumWX2 += w * x * x;
72 fSumWX3 += w * x * x * x;
73 fSumWX4 += w * x * x * x * x;
74 }
75
77 {
79 fSumWX += other.fSumWX;
80 fSumWX2 += other.fSumWX2;
81 fSumWX3 += other.fSumWX3;
82 fSumWX4 += other.fSumWX4;
83 }
84
85 /// Add another statistics object using atomic instructions.
86 ///
87 /// \param[in] other another statistics object that must not be modified during the operation
96
97 void Clear()
98 {
99 fSumWX = 0.0;
100 fSumWX2 = 0.0;
101 fSumWX3 = 0.0;
102 fSumWX4 = 0.0;
103 }
104
105 void Scale(double factor)
106 {
108 fSumWX *= factor;
109 fSumWX2 *= factor;
110 fSumWX3 *= factor;
111 fSumWX4 *= factor;
112 }
113 };
114
115private:
116 /// The number of entries
117 std::uint64_t fNEntries = 0;
118 /// The sum of weights
119 double fSumW = 0.0;
120 /// The sum of weights squared
121 double fSumW2 = 0.0;
122 /// The sums per dimension
123 std::vector<RDimensionStats> fDimensionStats;
124 /// Whether this object is tainted, in which case read access will throw. This is used if a user modifies bin
125 /// contents explicitly or slices histograms without preserving all entries, for example.
126 bool fTainted = false;
127
128 void ThrowIfTainted() const
129 {
130 if (fTainted) {
131 throw std::logic_error("statistics are tainted, for example after setting bin contents or slicing");
132 }
133 }
134
135public:
136 /// Construct a statistics object.
137 ///
138 /// \param[in] nDimensions the number of dimensions, must be > 0
139 explicit RHistStats(std::size_t nDimensions)
140 {
141 if (nDimensions == 0) {
142 throw std::invalid_argument("nDimensions must be > 0");
143 }
145 }
146
147 /// \name Accessors
148 /// \{
149
150 std::size_t GetNDimensions() const { return fDimensionStats.size(); }
151
152 std::uint64_t GetNEntries() const
153 {
155 return fNEntries;
156 }
157 double GetSumW() const
158 {
160 return fSumW;
161 }
162 double GetSumW2() const
163 {
165 return fSumW2;
166 }
167
168 /// Get the statistics object for one dimension.
169 ///
170 /// Throws an exception if the dimension is disabled.
171 ///
172 /// \param[in] dim the dimension index, starting at 0
173 /// \return the statistics object
174 const RDimensionStats &GetDimensionStats(std::size_t dim = 0) const
175 {
177
178 const RDimensionStats &stats = fDimensionStats.at(dim);
179 if (!stats.fEnabled) {
180 throw std::invalid_argument("dimension is disabled");
181 }
182 return stats;
183 }
184
185 /// Disable one dimension of this statistics object.
186 ///
187 /// All future calls to Fill will ignore the corresponding argument. Once disabled, a dimension cannot be reenabled.
188 ///
189 /// \param[in] dim the dimension index, starting at 0
190 void DisableDimension(std::size_t dim) { fDimensionStats.at(dim).fEnabled = false; }
191
192 bool IsEnabled(std::size_t dim) const { return fDimensionStats.at(dim).fEnabled; }
193
194 /// Taint this statistics object.
195 ///
196 /// It can still be filled, but any read access will throw until Clear() is called.
197 void Taint() { fTainted = true; }
198
199 bool IsTainted() const { return fTainted; }
200
201 /// \}
202 /// \name Computations
203 /// \{
204
205 /// Compute the number of effective entries.
206 ///
207 /// \f[
208 /// \frac{(\sum w_i)^2}{\sum w_i^2}
209 /// \f]
210 ///
211 /// \return the number of effective entries
213 {
215 if (fSumW2 == 0) {
216 return std::numeric_limits<double>::signaling_NaN();
217 }
218 return fSumW * fSumW / fSumW2;
219 }
220
221 /// Compute the arithmetic mean of unbinned values.
222 ///
223 /// \f[
224 /// \mu = \frac{\sum w_i \cdot x_i}{\sum w_i}
225 /// \f]
226 ///
227 /// \param[in] dim the dimension index, starting at 0
228 /// \return the arithmetic mean of unbinned values
229 double ComputeMean(std::size_t dim = 0) const
230 {
231 // First get the statistics, which includes checking the argument.
232 auto &stats = GetDimensionStats(dim);
233 if (fSumW == 0) {
234 return std::numeric_limits<double>::signaling_NaN();
235 }
236 return stats.fSumWX / fSumW;
237 }
238
239 /// Compute the variance of unbinned values.
240 ///
241 /// This function computes the uncorrected sample variance:
242 /// \f[
243 /// \sigma^2 = \frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2
244 /// \f]
245 /// With some rewriting, this is equivalent to:
246 /// \f[
247 /// \sigma^2 = \frac{\sum w_i \cdot x_i^2}{\sum w_i} - \mu^2
248 /// \f]
249 ///
250 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
251 /// In other words, the return value is a biased estimation lower than the actual population variance.
252 ///
253 /// \param[in] dim the dimension index, starting at 0
254 /// \return the variance of unbinned values
255 double ComputeVariance(std::size_t dim = 0) const
256 {
257 // First get the statistics, which includes checking the argument.
258 auto &stats = GetDimensionStats(dim);
259 if (fSumW == 0) {
260 return std::numeric_limits<double>::signaling_NaN();
261 }
262 double mean = ComputeMean(dim);
263 return stats.fSumWX2 / fSumW - mean * mean;
264 }
265
266 /// Compute the standard deviation of unbinned values.
267 ///
268 /// This function computes the uncorrected sample standard deviation:
269 /// \f[
270 /// \sigma = \sqrt{\frac{1}{\sum w_i} \sum(w_i \cdot x_i - \mu)^2}
271 /// \f]
272 /// With some rewriting, this is equivalent to:
273 /// \f[
274 /// \sigma = \sqrt{\frac{\sum w_i \cdot x_i^2}{\sum w_i} - \frac{(\sum w_i \cdot x_i)^2}{(\sum w_i)^2}}
275 /// \f]
276 ///
277 /// This function does not include Bessel's correction needed for an unbiased estimator of population variance.
278 /// In other words, the return value is a biased estimation lower than the actual population standard deviation.
279 ///
280 /// \param[in] dim the dimension index, starting at 0
281 /// \return the standard deviation of unbinned values
282 double ComputeStdDev(std::size_t dim = 0) const { return std::sqrt(ComputeVariance(dim)); }
283
284 // clang-format off
285 /// Compute the skewness of unbinned values.
286 ///
287 /// The skewness is the third standardized moment:
288 /// \f[
289 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^3\right]
290 /// \f]
291 /// With support for weighted filling and after some rewriting, it is computed as:
292 /// \f[
293 /// \frac{\frac{\sum w_i \cdot x_i^3}{\sum w_i} - 3 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu + 2 \cdot \mu^3}{\sigma^3}
294 /// \f]
295 ///
296 /// \param[in] dim the dimension index, starting at 0
297 /// \return the skewness of unbinned values
298 // clang-format on
299 double ComputeSkewness(std::size_t dim = 0) const
300 {
301 // First get the statistics, which includes checking the argument.
302 auto &stats = GetDimensionStats(dim);
303 if (fSumW == 0) {
304 return std::numeric_limits<double>::signaling_NaN();
305 }
306 double mean = ComputeMean(dim);
307 double var = ComputeVariance(dim);
308 if (var == 0) {
309 return std::numeric_limits<double>::signaling_NaN();
310 }
311 double EWX3 = stats.fSumWX3 / fSumW;
312 double EWX2 = stats.fSumWX2 / fSumW;
313 return (EWX3 - 3 * EWX2 * mean + 2 * mean * mean * mean) / std::pow(var, 1.5);
314 }
315
316 // clang-format off
317 /// Compute the (excess) kurtosis of unbinned values.
318 ///
319 /// The kurtosis is based on the fourth standardized moment:
320 /// \f[
321 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right]
322 /// \f]
323 /// The excess kurtosis subtracts 3 from the standardized moment to have a value of 0 for a normal distribution:
324 /// \f[
325 /// E\left[\left(\frac{X - \mu}{\sigma}\right)^4\right] - 3
326 /// \f]
327 ///
328 /// With support for weighted filling and after some rewriting, the (excess kurtosis) is computed as:
329 /// \f[
330 /// \frac{\frac{\sum w_i \cdot x_i^4}{\sum w_i} - 4 \cdot \frac{\sum w_i \cdot x_i^3}{\sum w_i} \cdot \mu + 6 \cdot \frac{\sum w_i \cdot x_i^2}{\sum w_i} \cdot \mu^2 - 3 \cdot \mu^4}{\sigma^4} - 3
331 /// \f]
332 ///
333 /// \param[in] dim the dimension index, starting at 0
334 /// \return the (excess) kurtosis of unbinned values
335 // clang-format on
336 double ComputeKurtosis(std::size_t dim = 0) const
337 {
338 // First get the statistics, which includes checking the argument.
339 auto &stats = GetDimensionStats(dim);
340 if (fSumW == 0) {
341 return std::numeric_limits<double>::signaling_NaN();
342 }
343 double mean = ComputeMean(dim);
344 double var = ComputeVariance(dim);
345 if (var == 0) {
346 return std::numeric_limits<double>::signaling_NaN();
347 }
348 double EWX4 = stats.fSumWX4 / fSumW;
349 double EWX3 = stats.fSumWX3 / fSumW;
350 double EWX2 = stats.fSumWX2 / fSumW;
351 return (EWX4 - 4 * EWX3 * mean + 6 * EWX2 * mean * mean - 3 * mean * mean * mean * mean) / (var * var) - 3;
352 }
353
354 /// \}
355
356private:
357 template <std::size_t I, std::size_t N, typename... A>
358 void CheckArguments(const std::tuple<A...> &args) const
359 {
360 using ArgumentType = std::tuple_element_t<I, std::tuple<A...>>;
361 if (fDimensionStats[I].fEnabled) {
362 if constexpr (!std::is_convertible_v<ArgumentType, double>) {
363 throw std::invalid_argument("invalid type of argument in RHistStats");
364 }
365 }
366 if constexpr (I + 1 < N) {
368 }
369 }
370
371 template <std::size_t I, typename... A>
372 void FillImpl(const std::tuple<A...> &args)
373 {
374 using ArgumentType = std::tuple_element_t<I, std::tuple<A...>>;
375 if (fDimensionStats[I].fEnabled) {
376 if constexpr (std::is_convertible_v<ArgumentType, double>) {
377 fDimensionStats[I].Add(std::get<I>(args));
378 } else {
379 // Should be checked in CheckArguments above!
380 assert(0); // GCOVR_EXCL_LINE
381 }
382 }
383 if constexpr (I + 1 < sizeof...(A)) {
384 FillImpl<I + 1>(args);
385 }
386 }
387
388 template <std::size_t I, std::size_t N, typename... A>
389 void FillImpl(const std::tuple<A...> &args, double w)
390 {
391 using ArgumentType = std::tuple_element_t<I, std::tuple<A...>>;
392 if (fDimensionStats[I].fEnabled) {
393 if constexpr (std::is_convertible_v<ArgumentType, double>) {
394 fDimensionStats[I].Add(std::get<I>(args), w);
395 } else {
396 // Avoid compiler warning about unused parameter...
397 (void)w;
398 // Should be checked in CheckArguments above!
399 assert(0); // GCOVR_EXCL_LINE
400 }
401 }
402 if constexpr (I + 1 < N) {
403 FillImpl<I + 1, N>(args, w);
404 }
405 }
406
407public:
408 /// \name Filling
409 /// \{
410
411 /// Fill an entry into this statistics object.
412 ///
413 /// \code
414 /// ROOT::Experimental::RHistStats stats(2);
415 /// auto args = std::make_tuple(8.5, 10.5);
416 /// stats.Fill(args);
417 /// \endcode
418 ///
419 /// Throws an exception if the number of arguments does not match the number of dimensions.
420 ///
421 /// \param[in] args the arguments for each dimension
422 /// \par See also
423 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
424 /// \ref Fill(const std::tuple<A...> &args, RWeight weight) "overload for weighted filling"
425 template <typename... A>
426 void Fill(const std::tuple<A...> &args)
427 {
428 if (sizeof...(A) != fDimensionStats.size()) {
429 throw std::invalid_argument("invalid number of arguments to Fill");
430 }
431 // For exception safety, first check all arguments before modifying the object.
432 CheckArguments<0, sizeof...(A)>(args);
433
434 fNEntries++;
435 fSumW++;
436 fSumW2++;
437 FillImpl<0>(args);
438 }
439
440 /// Fill an entry into this statistics object with a weight.
441 ///
442 /// \code
443 /// ROOT::Experimental::RHistStats stats(2);
444 /// auto args = std::make_tuple(8.5, 10.5);
445 /// stats.Fill(args, ROOT::Experimental::RWeight(0.8));
446 /// \endcode
447 ///
448 /// \param[in] args the arguments for each dimension
449 /// \param[in] weight the weight for this entry
450 /// \par See also
451 /// the \ref Fill(const A &... args) "variadic function template overload" accepting arguments directly and the
452 /// \ref Fill(const std::tuple<A...> &args) "overload for unweighted filling"
453 template <typename... A>
454 void Fill(const std::tuple<A...> &args, RWeight weight)
455 {
456 if (sizeof...(A) != fDimensionStats.size()) {
457 throw std::invalid_argument("invalid number of arguments to Fill");
458 }
459 // For exception safety, first check all arguments before modifying the object.
460 CheckArguments<0, sizeof...(A)>(args);
461
462 fNEntries++;
463 double w = weight.fValue;
464 fSumW += w;
465 fSumW2 += w * w;
466 FillImpl<0, sizeof...(A)>(args, w);
467 }
468
469 /// Fill an entry into this statistics object.
470 ///
471 /// \code
472 /// ROOT::Experimental::RHistStats stats(2);
473 /// stats.Fill(8.5, 10.5);
474 /// \endcode
475 /// For weighted filling, pass an RWeight as the last argument:
476 /// \code
477 /// ROOT::Experimental::RHistStats stats(2);
478 /// stats.Fill(8.5, 10.5, ROOT::Experimental::RWeight(0.8));
479 /// \endcode
480 ///
481 /// Throws an exception if the number of arguments does not match the number of dimensions.
482 ///
483 /// \param[in] args the arguments for each dimension
484 /// \par See also
485 /// the function overloads accepting `std::tuple` \ref Fill(const std::tuple<A...> &args) "for unweighted filling"
486 /// and \ref Fill(const std::tuple<A...> &args, RWeight) "for weighted filling"
487 template <typename... A>
488 void Fill(const A &...args)
489 {
490 static_assert(sizeof...(A) >= 1, "need at least one argument to Fill");
491 if constexpr (sizeof...(A) >= 1) {
492 auto t = std::forward_as_tuple(args...);
493 if constexpr (std::is_same_v<typename Internal::LastType<A...>::type, RWeight>) {
494 static constexpr std::size_t N = sizeof...(A) - 1;
495 if (N != fDimensionStats.size()) {
496 throw std::invalid_argument("invalid number of arguments to Fill");
497 }
498 // For exception safety, first check all arguments before modifying the object.
500
501 fNEntries++;
502 double w = std::get<N>(t).fValue;
503 fSumW += w;
504 fSumW2 += w * w;
505 FillImpl<0, N>(t, w);
506 } else {
507 Fill(t);
508 }
509 }
510 }
511
512 /// \}
513 /// \name Operations
514 /// \{
515
516 /// Add all entries from another statistics object.
517 ///
518 /// Throws an exception if the number of dimensions are not identical.
519 ///
520 /// \param[in] other another statistics object
521 void Add(const RHistStats &other)
522 {
523 // NB: this method does *not* call ThrowIfTainted() to allow adding RHist which may contain a tainted statistics
524 // object.
525 if (fDimensionStats.size() != other.fDimensionStats.size()) {
526 throw std::invalid_argument("number of dimensions not identical in Add");
527 }
528 // For exception safety, first check all dimensions before modifying the object.
529 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
530 if (fDimensionStats[i].fEnabled != other.fDimensionStats[i].fEnabled) {
531 throw std::invalid_argument("the same dimensions must be enabled to combine statistics with Add");
532 }
533 }
534
535 fNEntries += other.fNEntries;
536 fSumW += other.fSumW;
537 fSumW2 += other.fSumW2;
538 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
539 if (fDimensionStats[i].fEnabled) {
540 fDimensionStats[i].Add(other.fDimensionStats[i]);
541 }
542 }
543 fTainted |= other.fTainted;
544 }
545
546 /// Add all entries from another statistics object using atomic instructions.
547 ///
548 /// Throws an exception if the number of dimensions are not identical.
549 ///
550 /// \param[in] other another statistics object that must not be modified during the operation
552 {
553 // NB: this method does *not* call ThrowIfTainted() to allow adding RHist which may contain a tainted statistics
554 // object.
555 if (fDimensionStats.size() != other.fDimensionStats.size()) {
556 throw std::invalid_argument("number of dimensions not identical in AddAtomic");
557 }
558 // For exception safety, first check all dimensions before modifying the object.
559 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
560 if (fDimensionStats[i].fEnabled != other.fDimensionStats[i].fEnabled) {
561 throw std::invalid_argument("the same dimensions must be enabled to combine statistics with AddAtomic");
562 }
563 }
564
568 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
569 if (fDimensionStats[i].fEnabled) {
570 fDimensionStats[i].AddAtomic(other.fDimensionStats[i]);
571 }
572 }
573 fTainted |= other.fTainted;
574 }
575
576 /// Clear this statistics object.
577 void Clear()
578 {
579 fNEntries = 0;
580 fSumW = 0;
581 fSumW2 = 0;
582 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
583 fDimensionStats[i].Clear();
584 }
585 fTainted = false;
586 }
587
588 /// Scale the histogram statistics.
589 ///
590 /// \param[in] factor the scale factor
591 void Scale(double factor)
592 {
593 // NB: this method does *not* call ThrowIfTainted() to allow scaling RHist which may contain a tainted statistics
594 // object.
595 fSumW *= factor;
596 fSumW2 *= factor * factor;
597 for (std::size_t i = 0; i < fDimensionStats.size(); i++) {
598 if (fDimensionStats[i].fEnabled) {
599 fDimensionStats[i].Scale(factor);
600 }
601 }
602 }
603
604 /// \}
605
606 /// %ROOT Streamer function to throw when trying to store an object of this class.
607 void Streamer(TBuffer &) { throw std::runtime_error("unable to store RHistStats"); }
608};
609
610} // namespace Experimental
611} // namespace ROOT
612
613#endif
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define N
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Histogram statistics of unbinned values.
void Add(const RHistStats &other)
Add all entries from another statistics object.
void Fill(const std::tuple< A... > &args, RWeight weight)
Fill an entry into this statistics object with a weight.
void FillImpl(const std::tuple< A... > &args)
double ComputeSkewness(std::size_t dim=0) const
Compute the skewness of unbinned values.
void Fill(const A &...args)
Fill an entry into this statistics object.
std::vector< RDimensionStats > fDimensionStats
The sums per dimension.
void Clear()
Clear this statistics object.
void Taint()
Taint this statistics object.
void FillImpl(const std::tuple< A... > &args, double w)
void AddAtomic(const RHistStats &other)
Add all entries from another statistics object using atomic instructions.
double fSumW
The sum of weights.
std::size_t GetNDimensions() const
const RDimensionStats & GetDimensionStats(std::size_t dim=0) const
Get the statistics object for one dimension.
double fSumW2
The sum of weights squared.
void Scale(double factor)
Scale the histogram statistics.
void Fill(const std::tuple< A... > &args)
Fill an entry into this statistics object.
double ComputeNEffectiveEntries() const
Compute the number of effective entries.
void DisableDimension(std::size_t dim)
Disable one dimension of this statistics object.
void CheckArguments(const std::tuple< A... > &args) const
double ComputeVariance(std::size_t dim=0) const
Compute the variance of unbinned values.
void Streamer(TBuffer &)
ROOT Streamer function to throw when trying to store an object of this class.
std::uint64_t GetNEntries() const
double ComputeMean(std::size_t dim=0) const
Compute the arithmetic mean of unbinned values.
double ComputeKurtosis(std::size_t dim=0) const
Compute the (excess) kurtosis of unbinned values.
RHistStats(std::size_t nDimensions)
Construct a statistics object.
bool IsEnabled(std::size_t dim) const
std::uint64_t fNEntries
The number of entries.
bool fTainted
Whether this object is tainted, in which case read access will throw.
double ComputeStdDev(std::size_t dim=0) const
Compute the standard deviation of unbinned values.
A histogram for aggregation of data along multiple dimensions.
Definition RHist.hxx:65
Buffer base class used for serializing objects.
Definition TBuffer.h:43
Double_t x[n]
Definition legend1.C:17
#define I(x, y, z)
std::enable_if_t< std::is_integral_v< T > > AtomicAdd(T *ptr, T val)
void AddAtomic(const RDimensionStats &other)
Add another statistics object using atomic instructions.
void Add(const RDimensionStats &other)
A weight for filling histograms.
Definition RWeight.hxx:17