ROOT  6.06/09
Reference Guide
GoFTest.h
Go to the documentation of this file.
1 // @(#)root/mathcore:$Id$
2 // Authors: Bartolomeu Rabacal 05/2010
3 /**********************************************************************
4  * *
5  * Copyright (c) 2006 , LCG ROOT MathLib Team *
6  * *
7  * *
8  **********************************************************************/
9 // Header file for GoFTest
10 
11 #ifndef ROOT_Math_GoFTest
12 #define ROOT_Math_GoFTest
13 
14 #include <memory>
15 
16 #ifndef ROOT_Math_WrappedFunction
17 #include "Math/WrappedFunction.h"
18 #endif
19 #ifndef ROOT_TMath
20 #include "TMath.h"
21 #endif
22 
23 /*
24 */
25 
26 namespace ROOT {
27 
28  namespace Fit {
29  class BinData;
30  }
31 namespace Math {
32 
33 ///// @defgroup GoFClasses Goodness of Fit Statistical Tests Tools
34 
35 /*
36  Class for Goodness of Fit tests implementing the Anderson-Darling and Kolmogorov-Smirnov 1- and 2-Samples Goodness of Fit Tests.
37  @ingroup MathCore
38 
39  */
40 
41 
42 class GoFTest {
43 public:
44 
45  enum EDistribution { // H0 distributions for using only with 1-sample tests
46  kUndefined, // Default value for non templated 1-sample test. Set with SetDistribution
47  kUserDefined, // For internal use only within the class's template constructor
51  };
52 
53  enum EUserDistribution { // User input distribution option
55  kPDF // Default value
56  };
57 
58  enum ETestType { // Goodness of Fit test types for using with the class's unary funtions as a shorthand for the in-built methods
59  kAD, // Anderson-Darling Test. Default value
60  kAD2s, // Anderson-Darling 2-Samples Test
61  kKS, // Kolmogorov-Smirnov Test
62  kKS2s // Kolmogorov-Smirnov 2-Samples Test
63  };
64 
65  /* Constructor for using only with 2-samples tests */
66  GoFTest(UInt_t sample1Size, const Double_t* sample1, UInt_t sample2Size, const Double_t* sample2);
67 
68  /* Constructor for using only with 1-sample tests with a specified distribution */
70 
71  /* Templated constructor for using only with 1-sample tests with a user specified distribution */
72  template<class Dist>
74  Double_t xmin = 1, Double_t xmax = 0)
75  {
76  Instantiate(sample, sampleSize);
77  SetUserDistribution<Dist>(dist, userDist, xmin, xmax);
78  }
79 
80  /* Specialization using IGenFunction interface */
82  Double_t xmin = 1, Double_t xmax = 0)
83  {
84  Instantiate(sample, sampleSize);
85  SetUserDistribution(dist, userDist, xmin, xmax);
86  }
87 
88  /* Sets the user input distribution function for 1-sample tests. */
89  template<class Dist>
91  WrappedFunction<Dist&> wdist(dist);
92  SetDistributionFunction(wdist, userDist, xmin, xmax);
93  }
94 
95  /* Template specialization to set the user input distribution for 1-sample tests */
97  SetDistributionFunction(dist, userDist, xmin, xmax);
98  }
99 
100  /* Sets the user input distribution as a probability density function for 1-sample tests */
101  template<class Dist>
102  void SetUserPDF(Dist& pdf, Double_t xmin = 1, Double_t xmax = 0) {
103  SetUserDistribution<Dist>(pdf, kPDF, xmin, xmax);
104  }
105 
106  /* Template specialization to set the user input distribution as a probability density function for 1-sample tests */
107  void SetUserPDF(const IGenFunction& pdf, Double_t xmin = 1, Double_t xmax = 0) {
109  }
110 
111  /* Sets the user input distribution as a cumulative distribution function for 1-sample tests
112  The CDF must return zero
113  */
114  template<class Dist>
116  SetUserDistribution<Dist>(cdf, kCDF, xmin, xmax);
117  }
118 
119  /* Template specialization to set the user input distribution as a cumulative distribution function for 1-sample tests */
122  }
123 
124 
125  /* Sets the distribution for the predefined distribution types */
127 
128 
129  virtual ~GoFTest();
130 
131 /*
132  The Anderson-Darling K-Sample Test algorithm is described and taken from
133  http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andeksam.htm
134  and described and taken from
135  (1) Scholz F.W., Stephens M.A. (1987), K-sample Anderson-Darling Tests, Journal of the American Statistical Association, 82, 918–924. (2-samples variant implemented)
136 */ void AndersonDarling2SamplesTest(Double_t& pvalue, Double_t& testStat) const;
137  Double_t AndersonDarling2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2"
138 
139 /*
140  The Anderson-Darling 1-Sample Test algorithm for a specific distribution is described at
141  http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andedarl.htm
142  and described and taken from (2)
143  Marsaglia J.C.W., Marsaglia G. (2004), Evaluating the Anderson-Darling Distribution, Journal of Statistical Software, Volume 09, Issue i02.
144  and described and taken from (3)
145  Lewis P.A.W. (1961), The Annals of Mathematical Statistics, Distribution of the Anderson-Darling Statistic, Volume 32, Number 4, 1118-1124.
146 */ void AndersonDarlingTest(Double_t& pvalue, Double_t& testStat) const;
147  Double_t AndersonDarlingTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2"
148 
149 /*
150  The Kolmogorov-Smirnov 2-Samples Test algorithm is described at
151  http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ks2samp.htm
152  and described and taken from
153  http://root.cern.ch/root/html/TMath.html#TMath:KolmogorovTest
154 */ void KolmogorovSmirnov2SamplesTest(Double_t& pvalue, Double_t& testStat) const;
155  Double_t KolmogorovSmirnov2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn"
156 
157 /*
158  The Kolmogorov-Smirnov 1-Sample Test algorithm for a specific distribution is described at
159  http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/kstest.htm
160  and described and taken from (4)
161  Press W. H., Teukolsky S.A., Vetterling W.T., Flannery B.P. (2007), Numerical Recipes - The Art of Scientific Computing (Third Edition), Cambridge Univerdity Press
162 */ void KolmogorovSmirnovTest(Double_t& pvalue, Double_t& testStat) const;
163  Double_t KolmogorovSmirnovTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn"
164 
165  // The class's unary functions
166  void operator()(ETestType test, Double_t& pvalue, Double_t& testStat) const;
167 
168  // Returns default Anderson Darling 1-Sample Test and default p-value; option "t" returns the test statistic value
169  // specific to the test type
170  Double_t operator()(ETestType test = kAD, const Char_t* option = "p") const;
171 
172  // Computation of the K-Sample Anderson-Darling Test's p-value as described in (1)
173  // given a normalized test statistic. The first variant described in the paper is used
174  static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2 );
175 
176  // Compute The 2-Sample Anderson Darling test for binned data
177  static void AndersonDarling2SamplesTest(const ROOT::Fit::BinData & data1, const ROOT::Fit::BinData & data2, Double_t& pvalue, Double_t& testStat);
178 
179 private:
180 
181  GoFTest(); // Disallowed default constructor
182  GoFTest(GoFTest& gof); // Disallowed copy constructor
183  GoFTest operator=(GoFTest& gof); // Disallowed assign operator
184 
185  std::auto_ptr<IGenFunction> fCDF;
186 
187 
189 
192 
193  std::vector<Double_t> fCombinedSamples;
194 
195  std::vector<std::vector<Double_t> > fSamples;
196 
198 
199  void SetCDF();
201 
202  void Instantiate(const Double_t* sample, UInt_t sampleSize);
203 
204 
208 
209  static Double_t GetSigmaN(const std::vector<UInt_t> & ns, UInt_t N); // Computation of sigma_N as described in (1)
210 
211  static Double_t InterpolatePValues(int nsamples,Double_t A2); // Linear interpolation used in GoFTest::PValueAD2Samples
212 
213 
214  Double_t PValueAD1Sample(Double_t A2) const; // Computation of the 1-Sample Anderson-Darling Test's p-value
215 
216  void LogSample(); // Applies the logarithm to the sample when the specified distribution to test is LogNormal
217 
218  void SetSamples(std::vector<const Double_t*> samples, const std::vector<UInt_t> samplesSizes);
219 
220  void SetParameters(); // Sets the estimated mean and standard-deviation from the samples
221 }; // end GoFTest class
222 
223 
224 } // ROOT namespace
225 } // Math namespace
226 #endif
GoFTest operator=(GoFTest &gof)
void SetDistribution(EDistribution dist)
Definition: GoFTest.cxx:120
Double_t PValueAD1Sample(Double_t A2) const
Definition: GoFTest.cxx:480
double dist(Rotation3D const &r1, Rotation3D const &r2)
Definition: 3DDistances.cxx:48
Bool_t fTestSampleFromH0
Definition: GoFTest.h:197
Double_t fMean
Definition: GoFTest.h:190
float xmin
Definition: THbookFile.cxx:93
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
Definition: IFunction.h:133
Namespace for new ROOT classes and functions.
Definition: ROOT.py:1
std::vector< Double_t > fCombinedSamples
Definition: GoFTest.h:193
static Double_t InterpolatePValues(int nsamples, Double_t A2)
std::auto_ptr< IGenFunction > fCDF
Definition: GoFTest.h:185
#define N
void SetUserCDF(Dist &cdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:115
bool Bool_t
Definition: RtypesCore.h:59
void KolmogorovSmirnovTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:916
void operator()(ETestType test, Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:207
Template class to wrap any C++ callable object which takes one argument i.e.
int sampleSize
Definition: unuranDistr.cxx:34
Double_t x[n]
Definition: legend1.C:17
void SetUserDistribution(const IGenFunction &dist, GoFTest::EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:96
double cdf(double *x, double *p)
Definition: unuranDistr.cxx:44
static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2)
Definition: GoFTest.cxx:350
EDistribution fDist
Definition: GoFTest.h:188
static Double_t GetSigmaN(const std::vector< UInt_t > &ns, UInt_t N)
Definition: GoFTest.cxx:308
double Dist(void *xp, void *yp)
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:643
Double_t GaussianCDF(Double_t x) const
Definition: GoFTest.cxx:292
unsigned int UInt_t
Definition: RtypesCore.h:42
virtual ~GoFTest()
Definition: GoFTest.cxx:177
Class describing the binned data sets : vectors of x coordinates, y values and optionally error on y ...
Definition: BinData.h:61
Double_t fSigma
Definition: GoFTest.h:191
float xmax
Definition: THbookFile.cxx:93
Double_t LogNormalCDF(Double_t x) const
std::vector< std::vector< Double_t > > fSamples
Definition: GoFTest.h:195
double Double_t
Definition: RtypesCore.h:55
TFitResultPtr Fit(FitObject *h1, TF1 *f1, Foption_t &option, const ROOT::Math::MinimizerOptions &moption, const char *goption, ROOT::Fit::DataRange &range)
Definition: HFitImpl.cxx:132
void Instantiate(const Double_t *sample, UInt_t sampleSize)
Definition: GoFTest.cxx:274
Namespace for new Math classes and functions.
GoFTest(UInt_t sampleSize, const Double_t *sample, const IGenFunction &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:81
char Char_t
Definition: RtypesCore.h:29
void SetUserPDF(Dist &pdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:102
void SetUserCDF(const IGenFunction &cdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:120
void SetSamples(std::vector< const Double_t * > samples, const std::vector< UInt_t > samplesSizes)
Definition: GoFTest.cxx:179
GoFTest(UInt_t sampleSize, const Double_t *sample, Dist &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:73
void SetDistributionFunction(const IGenFunction &cdf, Bool_t isPDF, Double_t xmin, Double_t xmax)
Definition: GoFTest.cxx:262
void SetUserPDF(const IGenFunction &pdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:107
void AndersonDarlingTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:857
void SetUserDistribution(Dist &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:90
void KolmogorovSmirnov2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:891
Double_t ExponentialCDF(Double_t x) const
Definition: GoFTest.cxx:296