Logo ROOT  
Reference Guide
GoFTest.h
Go to the documentation of this file.
1// @(#)root/mathcore:$Id$
2// Authors: Bartolomeu Rabacal 05/2010
3/**********************************************************************
4 * *
5 * Copyright (c) 2006 , LCG ROOT MathLib Team *
6 * *
7 * *
8 **********************************************************************/
9// Header file for GoFTest
10
11#ifndef ROOT_Math_GoFTest
12#define ROOT_Math_GoFTest
13
15#include "TMath.h"
16
17#include <memory>
18
19/*
20*/
21
22namespace ROOT {
23
24 namespace Fit {
25 class BinData;
26 }
27namespace Math {
28
29///// @defgroup GoFClasses Goodness of Fit Statistical Tests Tools
30
31/*
32 Class for Goodness of Fit tests implementing the Anderson-Darling and Kolmogorov-Smirnov 1- and 2-Samples Goodness of Fit Tests.
33 @ingroup MathCore
34
35 */
36
37
38class GoFTest {
39public:
40
41 enum EDistribution { // H0 distributions for using only with 1-sample tests
42 kUndefined, // Default value for non templated 1-sample test. Set with SetDistribution
43 kUserDefined, // For internal use only within the class's template constructor
47 };
48
49 enum EUserDistribution { // User input distribution option
51 kPDF // Default value
52 };
53
54 enum ETestType { // Goodness of Fit test types for using with the class's unary funtions as a shorthand for the in-built methods
55 kAD, // Anderson-Darling Test. Default value
56 kAD2s, // Anderson-Darling 2-Samples Test
57 kKS, // Kolmogorov-Smirnov Test
58 kKS2s // Kolmogorov-Smirnov 2-Samples Test
59 };
60
61 /* Constructor for using only with 2-samples tests */
62 GoFTest(UInt_t sample1Size, const Double_t* sample1, UInt_t sample2Size, const Double_t* sample2);
63
64 /* Constructor for using only with 1-sample tests with a specified distribution */
65 GoFTest(UInt_t sampleSize, const Double_t* sample, EDistribution dist = kUndefined);
66
67 /* Templated constructor for using only with 1-sample tests with a user specified distribution */
68 template<class Dist>
69 GoFTest(UInt_t sampleSize, const Double_t* sample, Dist& dist, EUserDistribution userDist = kPDF,
70 Double_t xmin = 1, Double_t xmax = 0)
71 {
72 Instantiate(sample, sampleSize);
73 SetUserDistribution<Dist>(dist, userDist, xmin, xmax);
74 }
75
76 /* Specialization using IGenFunction interface */
77 GoFTest(UInt_t sampleSize, const Double_t* sample, const IGenFunction& dist, EUserDistribution userDist = kPDF,
78 Double_t xmin = 1, Double_t xmax = 0)
79 {
80 Instantiate(sample, sampleSize);
81 SetUserDistribution(dist, userDist, xmin, xmax);
82 }
83
84 /* Sets the user input distribution function for 1-sample tests. */
85 template<class Dist>
88 SetDistributionFunction(wdist, userDist, xmin, xmax);
89 }
90
91 /* Template specialization to set the user input distribution for 1-sample tests */
94 }
95
96 /* Sets the user input distribution as a probability density function for 1-sample tests */
97 template<class Dist>
98 void SetUserPDF(Dist& pdf, Double_t xmin = 1, Double_t xmax = 0) {
99 SetUserDistribution<Dist>(pdf, kPDF, xmin, xmax);
100 }
101
102 /* Template specialization to set the user input distribution as a probability density function for 1-sample tests */
103 void SetUserPDF(const IGenFunction& pdf, Double_t xmin = 1, Double_t xmax = 0) {
105 }
106
107 /* Sets the user input distribution as a cumulative distribution function for 1-sample tests
108 The CDF must return zero
109 */
110 template<class Dist>
111 void SetUserCDF(Dist& cdf, Double_t xmin = 1, Double_t xmax = 0) {
112 SetUserDistribution<Dist>(cdf, kCDF, xmin, xmax);
113 }
114
115 /* Template specialization to set the user input distribution as a cumulative distribution function for 1-sample tests */
116 void SetUserCDF(const IGenFunction& cdf, Double_t xmin = 1, Double_t xmax = 0) {
118 }
119
120
121 /* Sets the distribution for the predefined distribution types */
123
124
125 virtual ~GoFTest();
126
127/*
128 The Anderson-Darling K-Sample Test algorithm is described and taken from
129 http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andeksam.htm
130 and described and taken from
131 (1) Scholz F.W., Stephens M.A. (1987), K-sample Anderson-Darling Tests, Journal of the American Statistical Association, 82, 918–924. (2-samples variant implemented)
132*/ void AndersonDarling2SamplesTest(Double_t& pvalue, Double_t& testStat) const;
133 Double_t AndersonDarling2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2"
134
135/*
136 The Anderson-Darling 1-Sample Test algorithm for a specific distribution is described at
137 http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andedarl.htm
138 and described and taken from (2)
139 Marsaglia J.C.W., Marsaglia G. (2004), Evaluating the Anderson-Darling Distribution, Journal of Statistical Software, Volume 09, Issue i02.
140 and described and taken from (3)
141 Lewis P.A.W. (1961), The Annals of Mathematical Statistics, Distribution of the Anderson-Darling Statistic, Volume 32, Number 4, 1118-1124.
142*/ void AndersonDarlingTest(Double_t& pvalue, Double_t& testStat) const;
143 Double_t AndersonDarlingTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2"
144
145/*
146 The Kolmogorov-Smirnov 2-Samples Test algorithm is described at
147 http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ks2samp.htm
148 and described and taken from
149 http://root.cern.ch/root/html/TMath.html#TMath:KolmogorovTest
150*/ void KolmogorovSmirnov2SamplesTest(Double_t& pvalue, Double_t& testStat) const;
151 Double_t KolmogorovSmirnov2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn"
152
153/*
154 The Kolmogorov-Smirnov 1-Sample Test algorithm for a specific distribution is described at
155 http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/kstest.htm
156 and described and taken from (4)
157 Press W. H., Teukolsky S.A., Vetterling W.T., Flannery B.P. (2007), Numerical Recipes - The Art of Scientific Computing (Third Edition), Cambridge Univerdity Press
158*/ void KolmogorovSmirnovTest(Double_t& pvalue, Double_t& testStat) const;
159 Double_t KolmogorovSmirnovTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn"
160
161 // The class's unary functions
162 void operator()(ETestType test, Double_t& pvalue, Double_t& testStat) const;
163
164 // Returns default Anderson Darling 1-Sample Test and default p-value; option "t" returns the test statistic value
165 // specific to the test type
166 Double_t operator()(ETestType test = kAD, const Char_t* option = "p") const;
167
168 // Computation of the K-Sample Anderson-Darling Test's p-value as described in (1)
169 // given a normalized test statistic. The first variant described in the paper is used
170 static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2 );
171
172 // Compute The 2-Sample Anderson Darling test for binned data
173 static void AndersonDarling2SamplesTest(const ROOT::Fit::BinData & data1, const ROOT::Fit::BinData & data2, Double_t& pvalue, Double_t& testStat);
174
175private:
176
177 GoFTest(); // Disallowed default constructor
178 GoFTest(GoFTest& gof); // Disallowed copy constructor
179 GoFTest operator=(GoFTest& gof); // Disallowed assign operator
180
181 std::unique_ptr<IGenFunction> fCDF;
182
183
185
188
189 std::vector<Double_t> fCombinedSamples;
190
191 std::vector<std::vector<Double_t> > fSamples;
192
194
195 void SetCDF();
197
198 void Instantiate(const Double_t* sample, UInt_t sampleSize);
199
200
204
205 static Double_t GetSigmaN(const std::vector<UInt_t> & ns, UInt_t N); // Computation of sigma_N as described in (1)
206
207 static Double_t InterpolatePValues(int nsamples,Double_t A2); // Linear interpolation used in GoFTest::PValueAD2Samples
208
209
210 Double_t PValueAD1Sample(Double_t A2) const; // Computation of the 1-Sample Anderson-Darling Test's p-value
211
212 void LogSample(); // Applies the logarithm to the sample when the specified distribution to test is LogNormal
213
214 void SetSamples(std::vector<const Double_t*> samples, const std::vector<UInt_t> samplesSizes);
215
216 void SetParameters(); // Sets the estimated mean and standard-deviation from the samples
217}; // end GoFTest class
218
219
220} // ROOT namespace
221} // Math namespace
222#endif
char Char_t
Definition: RtypesCore.h:31
unsigned int UInt_t
Definition: RtypesCore.h:44
double Double_t
Definition: RtypesCore.h:57
#define N
float xmin
Definition: THbookFile.cxx:93
float xmax
Definition: THbookFile.cxx:93
Class describing the binned data sets : vectors of x coordinates, y values and optionally error on y ...
Definition: BinData.h:53
void SetUserDistribution(Dist &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:86
static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2)
Definition: GoFTest.cxx:353
void operator()(ETestType test, Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:208
void SetDistributionFunction(const IGenFunction &cdf, Bool_t isPDF, Double_t xmin, Double_t xmax)
Definition: GoFTest.cxx:264
GoFTest(GoFTest &gof)
std::unique_ptr< IGenFunction > fCDF
Definition: GoFTest.h:181
Bool_t fTestSampleFromH0
Definition: GoFTest.h:193
void SetUserPDF(const IGenFunction &pdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:103
void SetUserPDF(Dist &pdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:98
EDistribution fDist
Definition: GoFTest.h:184
GoFTest operator=(GoFTest &gof)
void SetSamples(std::vector< const Double_t * > samples, const std::vector< UInt_t > samplesSizes)
Definition: GoFTest.cxx:180
void SetUserCDF(const IGenFunction &cdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:116
Double_t fSigma
Definition: GoFTest.h:187
std::vector< Double_t > fCombinedSamples
Definition: GoFTest.h:189
void KolmogorovSmirnovTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:921
GoFTest(UInt_t sampleSize, const Double_t *sample, const IGenFunction &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:77
void SetDistribution(EDistribution dist)
Definition: GoFTest.cxx:123
Double_t LogNormalCDF(Double_t x) const
void Instantiate(const Double_t *sample, UInt_t sampleSize)
Definition: GoFTest.cxx:276
virtual ~GoFTest()
Definition: GoFTest.cxx:178
Double_t GaussianCDF(Double_t x) const
Definition: GoFTest.cxx:294
GoFTest(UInt_t sampleSize, const Double_t *sample, Dist &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:69
void SetUserDistribution(const IGenFunction &dist, GoFTest::EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:92
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:646
static Double_t GetSigmaN(const std::vector< UInt_t > &ns, UInt_t N)
Definition: GoFTest.cxx:311
void KolmogorovSmirnov2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:896
std::vector< std::vector< Double_t > > fSamples
Definition: GoFTest.h:191
Double_t PValueAD1Sample(Double_t A2) const
Definition: GoFTest.cxx:483
void AndersonDarlingTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:862
Double_t ExponentialCDF(Double_t x) const
Definition: GoFTest.cxx:298
static Double_t InterpolatePValues(int nsamples, Double_t A2)
Double_t fMean
Definition: GoFTest.h:186
void SetUserCDF(Dist &cdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:111
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
Definition: IFunction.h:135
Template class to wrap any C++ callable object which takes one argument i.e.
Double_t x[n]
Definition: legend1.C:17
TFitResultPtr Fit(FitObject *h1, TF1 *f1, Foption_t &option, const ROOT::Math::MinimizerOptions &moption, const char *goption, ROOT::Fit::DataRange &range)
Definition: HFitImpl.cxx:134
Namespace for new Math classes and functions.
double Dist(void *xp, void *yp)
double dist(Rotation3D const &r1, Rotation3D const &r2)
Definition: 3DDistances.cxx:48
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
Definition: StringConv.hxx:21
static constexpr double ns
Definition: test.py:1