Logo ROOT   6.10/09
Reference Guide
goftest.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_math
3 /// \notebook
4 /// GoFTest tutorial macro
5 ///
6 /// Using Anderson-Darling and Kolmogorov-Smirnov goodness of fit tests
7 /// 1 sample test is performed comparing data with a log-normal distribution
8 /// and a 2 sample test is done comparing two gaussian data sets.
9 ///
10 /// \macro_image
11 /// \macro_output
12 /// \macro_code
13 ///
14 /// \author Bartolomeu Rabacal
15 
16 #include <cassert>
17 #include "TCanvas.h"
18 #include "TPaveText.h"
19 #include "TH1.h"
20 #include "TF1.h"
21 #include "Math/GoFTest.h"
22 #include "Math/Functor.h"
23 #include "TRandom3.h"
24 #include "Math/DistFunc.h"
25 
26 // need to use Functor1D
27 double landau(double x) {
28  return ROOT::Math::landau_pdf(x);
29 }
30 
31 void goftest() {
32 
33  // ------------------------------------------------------------------------
34  // Case 1: Create logNormal random sample
35 
36 
37  UInt_t nEvents1 = 1000;
38 
39  //ROOT::Math::Random<ROOT::Math::GSLRngMT> r;
40  TF1 * f1 = new TF1("logNormal","ROOT::Math::lognormal_pdf(x,[0],[1])",0,500);
41  // set the lognormal parameters (m and s)
42  f1->SetParameters(4.0,1.0);
43  f1->SetNpx(1000);
44 
45 
46  Double_t* sample1 = new Double_t[nEvents1];
47 
48  TH1D* h1smp = new TH1D("h1smp", "LogNormal distribution histogram", 100, 0, 500);
49  h1smp->SetStats(kFALSE);
50 
51  for (UInt_t i = 0; i < nEvents1; ++i) {
52  //Double_t data = f1->GetRandom();
53  Double_t data = gRandom->Gaus(4,1);
54  data = TMath::Exp(data);
55  sample1[i] = data;
56  h1smp->Fill(data);
57  }
58  // normalize correctly the histogram using the entries inside
59  h1smp->Scale( ROOT::Math::lognormal_cdf(500.,4.,1) / nEvents1, "width");
60 
61  TCanvas* c = new TCanvas("c","1-Sample and 2-Samples GoF Tests");
62  c->Divide(1, 2);
63  TPad * pad = (TPad *)c->cd(1);
64  h1smp->Draw();
65  h1smp->SetLineColor(kBlue);
66  pad->SetLogy();
67  f1->SetNpx(100); // use same points as histo for drawing
68  f1->SetLineColor(kRed);
69  f1->Draw("SAME");
70 
71  // -----------------------------------------
72  // Create GoFTest object
73 
74 
75  ROOT::Math::GoFTest* goftest_1 = new ROOT::Math::GoFTest(nEvents1, sample1, ROOT::Math::GoFTest::kLogNormal);
76  //----------------------------------------------------
77  // Possible calls for the Anderson - DarlingTest test
78  // a) Returning the Anderson-Darling standardized test statistic
79  Double_t A2_1 = goftest_1-> AndersonDarlingTest("t");
80  Double_t A2_2 = (*goftest_1)(ROOT::Math::GoFTest::kAD, "t");
81  assert(A2_1 == A2_2);
82 
83  // b) Returning the p-value for the Anderson-Darling test statistic
84  Double_t pvalueAD_1 = goftest_1-> AndersonDarlingTest(); // p-value is the default choice
85  Double_t pvalueAD_2 = (*goftest_1)(); // p-value and Anderson - Darling Test are the default choices
86  assert(pvalueAD_1 == pvalueAD_2);
87 
88  // Rebuild the test using the default 1-sample construtor
89  delete goftest_1;
90  goftest_1 = new ROOT::Math::GoFTest(nEvents1, sample1 ); // User must then input a distribution type option
92 
93  //--------------------------------------------------
94  // Possible calls for the Kolmogorov - Smirnov test
95  // a) Returning the Kolmogorov-Smirnov standardized test statistic
96  Double_t Dn_1 = goftest_1-> KolmogorovSmirnovTest("t");
97  Double_t Dn_2 = (*goftest_1)(ROOT::Math::GoFTest::kKS, "t");
98  assert(Dn_1 == Dn_2);
99 
100  // b) Returning the p-value for the Kolmogorov-Smirnov test statistic
101  Double_t pvalueKS_1 = goftest_1-> KolmogorovSmirnovTest();
102  Double_t pvalueKS_2 = (*goftest_1)(ROOT::Math::GoFTest::kKS);
103  assert(pvalueKS_1 == pvalueKS_2);
104 
105  // Valid but incorrect calls for the 2-samples methods of the 1-samples constructed goftest_1
106 #ifdef TEST_ERROR_MESSAGE
107  Double_t A2 = (*goftest_1)(ROOT::Math::GoFTest::kAD2s, "t"); // Issues error message
108  Double_t pvalueKS = (*goftest_1)(ROOT::Math::GoFTest::kKS2s); // Issues error message
109  assert(A2 == pvalueKS);
110 #endif
111 
112  TPaveText* pt1 = new TPaveText(0.58, 0.6, 0.88, 0.80, "brNDC");
113  Char_t str1[50];
114  sprintf(str1, "p-value for A-D 1-smp test: %f", pvalueAD_1);
115  pt1->AddText(str1);
116  pt1->SetFillColor(18);
117  pt1->SetTextFont(20);
118  pt1->SetTextColor(4);
119  Char_t str2[50];
120  sprintf(str2, "p-value for K-S 1-smp test: %f", pvalueKS_1);
121  pt1->AddText(str2);
122  pt1->Draw();
123 
124  // ------------------------------------------------------------------------
125  // Case 2: Create Gaussian random samples
126 
127  UInt_t nEvents2 = 2000;
128 
129  Double_t* sample2 = new Double_t[nEvents2];
130 
131  TH1D* h2smps_1 = new TH1D("h2smps_1", "Gaussian distribution histograms", 100, 0, 500);
132  h2smps_1->SetStats(kFALSE);
133 
134  TH1D* h2smps_2 = new TH1D("h2smps_2", "Gaussian distribution histograms", 100, 0, 500);
135  h2smps_2->SetStats(kFALSE);
136 
137  TRandom3 r;
138  for (UInt_t i = 0; i < nEvents1; ++i) {
139  Double_t data = r.Gaus(300, 50);
140  sample1[i] = data;
141  h2smps_1->Fill(data);
142  }
143  h2smps_1->Scale(1. / nEvents1, "width");
144  c->cd(2);
145  h2smps_1->Draw();
146  h2smps_1->SetLineColor(kBlue);
147 
148  for (UInt_t i = 0; i < nEvents2; ++i) {
149  Double_t data = r.Gaus(300, 50);
150  sample2[i] = data;
151  h2smps_2->Fill(data);
152  }
153  h2smps_2->Scale(1. / nEvents2, "width");
154  h2smps_2->Draw("SAME");
155  h2smps_2->SetLineColor(kRed);
156 
157  // -----------------------------------------
158  // Create GoFTest object
159 
160  ROOT::Math::GoFTest* goftest_2 = new ROOT::Math::GoFTest(nEvents1, sample1, nEvents2, sample2);
161 
162  //----------------------------------------------------
163  // Possible calls for the Anderson - DarlingTest test
164  // a) Returning the Anderson-Darling standardized test statistic
165  A2_1 = goftest_2->AndersonDarling2SamplesTest("t");
166  A2_2 = (*goftest_2)(ROOT::Math::GoFTest::kAD2s, "t");
167  assert(A2_1 == A2_2);
168 
169  // b) Returning the p-value for the Anderson-Darling test statistic
170  pvalueAD_1 = goftest_2-> AndersonDarling2SamplesTest(); // p-value is the default choice
171  pvalueAD_2 = (*goftest_2)(ROOT::Math::GoFTest::kAD2s); // p-value is the default choices
172  assert(pvalueAD_1 == pvalueAD_2);
173 
174  //--------------------------------------------------
175  // Possible calls for the Kolmogorov - Smirnov test
176  // a) Returning the Kolmogorov-Smirnov standardized test statistic
177  Dn_1 = goftest_2-> KolmogorovSmirnov2SamplesTest("t");
178  Dn_2 = (*goftest_2)(ROOT::Math::GoFTest::kKS2s, "t");
179  assert(Dn_1 == Dn_2);
180 
181  // b) Returning the p-value for the Kolmogorov-Smirnov test statistic
182  pvalueKS_1 = goftest_2-> KolmogorovSmirnov2SamplesTest();
183  pvalueKS_2 = (*goftest_2)(ROOT::Math::GoFTest::kKS2s);
184  assert(pvalueKS_1 == pvalueKS_2);
185 
186 #ifdef TEST_ERROR_MESSAGE
187  /* Valid but incorrect calls for the 1-sample methods of the 2-samples constucted goftest_2 */
188  A2 = (*goftest_2)(ROOT::Math::GoFTest::kAD, "t"); // Issues error message
189  pvalueKS = (*goftest_2)(ROOT::Math::GoFTest::kKS); // Issues error message
190  assert(A2 == pvalueKS);
191 #endif
192 
193  TPaveText* pt2 = new TPaveText(0.13, 0.6, 0.43, 0.8, "brNDC");
194  sprintf(str1, "p-value for A-D 2-smps test: %f", pvalueAD_1);
195  pt2->AddText(str1);
196  pt2->SetFillColor(18);
197  pt2->SetTextFont(20);
198  pt2->SetTextColor(4);
199  sprintf(str2, "p-value for K-S 2-smps test: %f", pvalueKS_1);
200  pt2-> AddText(str2);
201  pt2-> Draw();
202 
203  // ------------------------------------------------------------------------
204  // Case 3: Create Landau random sample
205 
206  UInt_t nEvents3 = 1000;
207 
208  Double_t* sample3 = new Double_t[nEvents3];
209  for (UInt_t i = 0; i < nEvents3; ++i) {
210  Double_t data = r.Landau();
211  sample3[i] = data;
212  }
213 
214  // ------------------------------------------
215  // Create GoFTest objects
216  //
217  // Possible constructors for the user input distribution
218 
219  // a) User input PDF
220  ROOT::Math::Functor1D f(&landau);
221  double minimum = 3*TMath::MinElement(nEvents3, sample3);
222  double maximum = 3*TMath::MaxElement(nEvents3, sample3);
223  ROOT::Math::GoFTest* goftest_3a = new ROOT::Math::GoFTest(nEvents3, sample3, f, ROOT::Math::GoFTest::kPDF, minimum,maximum); // need to specify am interval
224  // b) User input CDF
226  ROOT::Math::GoFTest* goftest_3b = new ROOT::Math::GoFTest(nEvents3, sample3, fI, ROOT::Math::GoFTest::kCDF,minimum,maximum);
227 
228 
229  // Returning the p-value for the Anderson-Darling test statistic
230  pvalueAD_1 = goftest_3a-> AndersonDarlingTest(); // p-value is the default choice
231 
232  pvalueAD_2 = (*goftest_3b)(); // p-value and Anderson - Darling Test are the default choices
233 
234  // Checking consistency between both tests
235  std::cout << " \n\nTEST with LANDAU distribution:\t";
236  if (TMath::Abs(pvalueAD_1 - pvalueAD_2) > 1.E-1 * pvalueAD_2) {
237  std::cout << "FAILED " << std::endl;
238  Error("goftest","Error in comparing testing using Landau and Landau CDF");
239  std::cerr << " pvalues are " << pvalueAD_1 << " " << pvalueAD_2 << std::endl;
240  }
241  else
242  std::cout << "OK ( pvalues = " << pvalueAD_2 << " )" << std::endl;
243 }
void SetDistribution(EDistribution dist)
Definition: GoFTest.cxx:123
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:5937
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3126
virtual void SetParameters(const Double_t *params)
Definition: TF1.h:588
Random number generator class based on M.
Definition: TRandom3.h:27
virtual void Draw(Option_t *option="")
Draw this pavetext with its current attributes.
Definition: TPaveText.cxx:234
virtual void SetNpx(Int_t npx=100)
Set the number of points used to draw the function.
Definition: TF1.cxx:3202
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition: TRandom.cxx:235
Definition: Rtypes.h:56
TVirtualPad * cd(Int_t subpadnumber=0)
Set current canvas & pad.
Definition: TCanvas.cxx:679
virtual TText * AddText(Double_t x1, Double_t y1, const char *label)
Add a new Text line to this pavetext at given coordinates.
Definition: TPaveText.cxx:183
virtual void Draw(Option_t *option="")
Draw this function with its current attributes.
Definition: TF1.cxx:1087
Short_t Abs(Short_t d)
Definition: TMathBase.h:108
double lognormal_cdf(double x, double m, double s, double x0=0)
Cumulative distribution function of the lognormal distribution (lower tail).
virtual void SetTextFont(Font_t tfont=62)
Set the text font.
Definition: TAttText.h:45
Double_t x[n]
Definition: legend1.C:17
double landau_pdf(double x, double xi=1, double x0=0)
Probability density function of the Landau distribution: with where .
void Error(const char *location, const char *msgfmt,...)
virtual void SetLineColor(Color_t lcolor)
Set the line color.
Definition: TAttLine.h:40
th1 Draw()
TRandom2 r(17)
virtual void Draw(Option_t *option="")
Draw this histogram with options.
Definition: TH1.cxx:2851
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Definition: TAttFill.h:37
unsigned int UInt_t
Definition: RtypesCore.h:42
The most important graphics class in the ROOT system.
Definition: TPad.h:29
Double_t LandauI(Double_t x)
Returns the value of the Landau distribution function at point x.
Definition: TMath.cxx:2767
R__EXTERN TRandom * gRandom
Definition: TRandom.h:62
tomato 1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:594
const Bool_t kFALSE
Definition: RtypesCore.h:92
The Canvas class.
Definition: TCanvas.h:31
Double_t Exp(Double_t x)
Definition: TMath.h:622
A Pave (see TPave) with text, lines or/and boxes inside.
Definition: TPaveText.h:21
double f(double x)
double Double_t
Definition: RtypesCore.h:55
T MaxElement(Long64_t n, const T *a)
Definition: TMath.h:837
char Char_t
Definition: RtypesCore.h:29
virtual void Divide(Int_t nx=1, Int_t ny=1, Float_t xmargin=0.01, Float_t ymargin=0.01, Int_t color=0)
Automatic pad generation by division.
Definition: TPad.cxx:1135
1-Dim function class
Definition: TF1.h:150
TF1 * f1
Definition: legend1.C:11
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Definition: THist.hxx:310
virtual void SetTextColor(Color_t tcolor=1)
Set the text color.
Definition: TAttText.h:43
Definition: Rtypes.h:56
Functor1D class for one-dimensional functions.
Definition: Functor.h:487
virtual Double_t Landau(Double_t mean=0, Double_t sigma=1)
Generate a random number following a Landau distribution with location parameter mu and scale paramet...
Definition: TRandom.cxx:340
virtual void SetStats(Bool_t stats=kTRUE)
Set statistics option on/off.
Definition: TH1.cxx:8103
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:644
T MinElement(Long64_t n, const T *a)
Definition: TMath.h:830
virtual void SetLogy(Int_t value=1)
Set Lin/Log scale for Y.
Definition: TPad.cxx:5780