Logo ROOT   6.08/07
Reference Guide
GoFTest.h
Go to the documentation of this file.
1 // @(#)root/mathcore:$Id$
2 // Authors: Bartolomeu Rabacal 05/2010
3 /**********************************************************************
4  * *
5  * Copyright (c) 2006 , LCG ROOT MathLib Team *
6  * *
7  * *
8  **********************************************************************/
9 // Header file for GoFTest
10 
11 #ifndef ROOT_Math_GoFTest
12 #define ROOT_Math_GoFTest
13 
14 #include "Math/WrappedFunction.h"
15 #include "TMath.h"
16 
17 #include <memory>
18 
19 /*
20 */
21 
22 namespace ROOT {
23 
24  namespace Fit {
25  class BinData;
26  }
27 namespace Math {
28 
29 ///// @defgroup GoFClasses Goodness of Fit Statistical Tests Tools
30 
31 /*
32  Class for Goodness of Fit tests implementing the Anderson-Darling and Kolmogorov-Smirnov 1- and 2-Samples Goodness of Fit Tests.
33  @ingroup MathCore
34 
35  */
36 
37 
38 class GoFTest {
39 public:
40 
41  enum EDistribution { // H0 distributions for using only with 1-sample tests
42  kUndefined, // Default value for non templated 1-sample test. Set with SetDistribution
43  kUserDefined, // For internal use only within the class's template constructor
46  kExponential
47  };
48 
49  enum EUserDistribution { // User input distribution option
51  kPDF // Default value
52  };
53 
54  enum ETestType { // Goodness of Fit test types for using with the class's unary funtions as a shorthand for the in-built methods
55  kAD, // Anderson-Darling Test. Default value
56  kAD2s, // Anderson-Darling 2-Samples Test
57  kKS, // Kolmogorov-Smirnov Test
58  kKS2s // Kolmogorov-Smirnov 2-Samples Test
59  };
60 
61  /* Constructor for using only with 2-samples tests */
62  GoFTest(UInt_t sample1Size, const Double_t* sample1, UInt_t sample2Size, const Double_t* sample2);
63 
64  /* Constructor for using only with 1-sample tests with a specified distribution */
65  GoFTest(UInt_t sampleSize, const Double_t* sample, EDistribution dist = kUndefined);
66 
67  /* Templated constructor for using only with 1-sample tests with a user specified distribution */
68  template<class Dist>
69  GoFTest(UInt_t sampleSize, const Double_t* sample, Dist& dist, EUserDistribution userDist = kPDF,
70  Double_t xmin = 1, Double_t xmax = 0)
71  {
72  Instantiate(sample, sampleSize);
73  SetUserDistribution<Dist>(dist, userDist, xmin, xmax);
74  }
75 
76  /* Specialization using IGenFunction interface */
77  GoFTest(UInt_t sampleSize, const Double_t* sample, const IGenFunction& dist, EUserDistribution userDist = kPDF,
78  Double_t xmin = 1, Double_t xmax = 0)
79  {
80  Instantiate(sample, sampleSize);
81  SetUserDistribution(dist, userDist, xmin, xmax);
82  }
83 
84  /* Sets the user input distribution function for 1-sample tests. */
85  template<class Dist>
87  WrappedFunction<Dist&> wdist(dist);
88  SetDistributionFunction(wdist, userDist, xmin, xmax);
89  }
90 
91  /* Template specialization to set the user input distribution for 1-sample tests */
93  SetDistributionFunction(dist, userDist, xmin, xmax);
94  }
95 
96  /* Sets the user input distribution as a probability density function for 1-sample tests */
97  template<class Dist>
98  void SetUserPDF(Dist& pdf, Double_t xmin = 1, Double_t xmax = 0) {
99  SetUserDistribution<Dist>(pdf, kPDF, xmin, xmax);
100  }
101 
102  /* Template specialization to set the user input distribution as a probability density function for 1-sample tests */
103  void SetUserPDF(const IGenFunction& pdf, Double_t xmin = 1, Double_t xmax = 0) {
104  SetUserDistribution(pdf, kPDF, xmin, xmax);
105  }
106 
107  /* Sets the user input distribution as a cumulative distribution function for 1-sample tests
108  The CDF must return zero
109  */
110  template<class Dist>
112  SetUserDistribution<Dist>(cdf, kCDF, xmin, xmax);
113  }
114 
115  /* Template specialization to set the user input distribution as a cumulative distribution function for 1-sample tests */
117  SetUserDistribution(cdf, kCDF, xmin, xmax);
118  }
119 
120 
121  /* Sets the distribution for the predefined distribution types */
122  void SetDistribution(EDistribution dist);
123 
124 
125  virtual ~GoFTest();
126 
127 /*
128  The Anderson-Darling K-Sample Test algorithm is described and taken from
129  http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andeksam.htm
130  and described and taken from
131  (1) Scholz F.W., Stephens M.A. (1987), K-sample Anderson-Darling Tests, Journal of the American Statistical Association, 82, 918–924. (2-samples variant implemented)
132 */ void AndersonDarling2SamplesTest(Double_t& pvalue, Double_t& testStat) const;
133  Double_t AndersonDarling2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2"
134 
135 /*
136  The Anderson-Darling 1-Sample Test algorithm for a specific distribution is described at
137  http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andedarl.htm
138  and described and taken from (2)
139  Marsaglia J.C.W., Marsaglia G. (2004), Evaluating the Anderson-Darling Distribution, Journal of Statistical Software, Volume 09, Issue i02.
140  and described and taken from (3)
141  Lewis P.A.W. (1961), The Annals of Mathematical Statistics, Distribution of the Anderson-Darling Statistic, Volume 32, Number 4, 1118-1124.
142 */ void AndersonDarlingTest(Double_t& pvalue, Double_t& testStat) const;
143  Double_t AndersonDarlingTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2"
144 
145 /*
146  The Kolmogorov-Smirnov 2-Samples Test algorithm is described at
147  http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ks2samp.htm
148  and described and taken from
149  http://root.cern.ch/root/html/TMath.html#TMath:KolmogorovTest
150 */ void KolmogorovSmirnov2SamplesTest(Double_t& pvalue, Double_t& testStat) const;
151  Double_t KolmogorovSmirnov2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn"
152 
153 /*
154  The Kolmogorov-Smirnov 1-Sample Test algorithm for a specific distribution is described at
155  http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/kstest.htm
156  and described and taken from (4)
157  Press W. H., Teukolsky S.A., Vetterling W.T., Flannery B.P. (2007), Numerical Recipes - The Art of Scientific Computing (Third Edition), Cambridge Univerdity Press
158 */ void KolmogorovSmirnovTest(Double_t& pvalue, Double_t& testStat) const;
159  Double_t KolmogorovSmirnovTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn"
160 
161  // The class's unary functions
162  void operator()(ETestType test, Double_t& pvalue, Double_t& testStat) const;
163 
164  // Returns default Anderson Darling 1-Sample Test and default p-value; option "t" returns the test statistic value
165  // specific to the test type
166  Double_t operator()(ETestType test = kAD, const Char_t* option = "p") const;
167 
168  // Computation of the K-Sample Anderson-Darling Test's p-value as described in (1)
169  // given a normalized test statistic. The first variant described in the paper is used
170  static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2 );
171 
172  // Compute The 2-Sample Anderson Darling test for binned data
173  static void AndersonDarling2SamplesTest(const ROOT::Fit::BinData & data1, const ROOT::Fit::BinData & data2, Double_t& pvalue, Double_t& testStat);
174 
175 private:
176 
177  GoFTest(); // Disallowed default constructor
178  GoFTest(GoFTest& gof); // Disallowed copy constructor
179  GoFTest operator=(GoFTest& gof); // Disallowed assign operator
180 
181  std::unique_ptr<IGenFunction> fCDF;
182 
183 
185 
188 
189  std::vector<Double_t> fCombinedSamples;
190 
191  std::vector<std::vector<Double_t> > fSamples;
192 
194 
195  void SetCDF();
196  void SetDistributionFunction(const IGenFunction& cdf, Bool_t isPDF, Double_t xmin, Double_t xmax);
197 
198  void Instantiate(const Double_t* sample, UInt_t sampleSize);
199 
200 
201  Double_t LogNormalCDF(Double_t x) const;
202  Double_t GaussianCDF(Double_t x) const;
203  Double_t ExponentialCDF(Double_t x) const;
204 
205  static Double_t GetSigmaN(const std::vector<UInt_t> & ns, UInt_t N); // Computation of sigma_N as described in (1)
206 
207  static Double_t InterpolatePValues(int nsamples,Double_t A2); // Linear interpolation used in GoFTest::PValueAD2Samples
208 
209 
210  Double_t PValueAD1Sample(Double_t A2) const; // Computation of the 1-Sample Anderson-Darling Test's p-value
211 
212  void LogSample(); // Applies the logarithm to the sample when the specified distribution to test is LogNormal
213 
214  void SetSamples(std::vector<const Double_t*> samples, const std::vector<UInt_t> samplesSizes);
215 
216  void SetParameters(); // Sets the estimated mean and standard-deviation from the samples
217 }; // end GoFTest class
218 
219 
220 } // ROOT namespace
221 } // Math namespace
222 #endif
double dist(Rotation3D const &r1, Rotation3D const &r2)
Definition: 3DDistances.cxx:48
Bool_t fTestSampleFromH0
Definition: GoFTest.h:193
Double_t fMean
Definition: GoFTest.h:186
float xmin
Definition: THbookFile.cxx:93
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
Definition: IFunction.h:133
This namespace contains pre-defined functions to be used in conjuction with TExecutor::Map and TExecu...
Definition: StringConv.hxx:21
std::vector< Double_t > fCombinedSamples
Definition: GoFTest.h:189
#define N
Definition: test.py:1
void SetUserCDF(Dist &cdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:111
bool Bool_t
Definition: RtypesCore.h:59
Template class to wrap any C++ callable object which takes one argument i.e.
int sampleSize
Definition: unuranDistr.cxx:34
void SetParameters(TFitEditor::FuncParams_t &pars, TF1 *func)
Restore the parameters from pars into the function.
Definition: TFitEditor.cxx:287
Double_t x[n]
Definition: legend1.C:17
void SetUserDistribution(const IGenFunction &dist, GoFTest::EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:92
double cdf(double *x, double *p)
Definition: unuranDistr.cxx:44
EDistribution fDist
Definition: GoFTest.h:184
std::unique_ptr< IGenFunction > fCDF
Definition: GoFTest.h:181
double Dist(void *xp, void *yp)
unsigned int UInt_t
Definition: RtypesCore.h:42
Class describing the binned data sets : vectors of x coordinates, y values and optionally error on y ...
Definition: BinData.h:61
Double_t fSigma
Definition: GoFTest.h:187
float xmax
Definition: THbookFile.cxx:93
std::vector< std::vector< Double_t > > fSamples
Definition: GoFTest.h:191
double Double_t
Definition: RtypesCore.h:55
TFitResultPtr Fit(FitObject *h1, TF1 *f1, Foption_t &option, const ROOT::Math::MinimizerOptions &moption, const char *goption, ROOT::Fit::DataRange &range)
Definition: HFitImpl.cxx:134
Namespace for new Math classes and functions.
GoFTest(UInt_t sampleSize, const Double_t *sample, const IGenFunction &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:77
char Char_t
Definition: RtypesCore.h:29
void SetUserPDF(Dist &pdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:98
void SetUserCDF(const IGenFunction &cdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:116
GoFTest(UInt_t sampleSize, const Double_t *sample, Dist &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:69
void SetUserPDF(const IGenFunction &pdf, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:103
void SetUserDistribution(Dist &dist, EUserDistribution userDist=kPDF, Double_t xmin=1, Double_t xmax=0)
Definition: GoFTest.h:86