Logo ROOT  
Reference Guide
goftest.C
Go to the documentation of this file.
1/// \file
2/// \ingroup tutorial_math
3/// \notebook
4/// GoFTest tutorial macro
5///
6/// Using Anderson-Darling and Kolmogorov-Smirnov goodness of fit tests
7/// 1 sample test is performed comparing data with a log-normal distribution
8/// and a 2 sample test is done comparing two gaussian data sets.
9///
10/// \macro_image
11/// \macro_output
12/// \macro_code
13///
14/// \author Bartolomeu Rabacal
15
16#include <cassert>
17#include "TCanvas.h"
18#include "TPaveText.h"
19#include "TH1.h"
20#include "TF1.h"
21#include "Math/GoFTest.h"
22#include "Math/Functor.h"
23#include "TRandom3.h"
24#include "Math/DistFunc.h"
25
26// need to use Functor1D
27double landau(double x) {
29}
30
31void goftest() {
32
33 // ------------------------------------------------------------------------
34 // Case 1: Create logNormal random sample
35
36
37 UInt_t nEvents1 = 1000;
38
39 //ROOT::Math::Random<ROOT::Math::GSLRngMT> r;
40 TF1 * f1 = new TF1("logNormal","ROOT::Math::lognormal_pdf(x,[0],[1])",0,500);
41 // set the lognormal parameters (m and s)
42 f1->SetParameters(4.0,1.0);
43 f1->SetNpx(1000);
44
45
46 Double_t* sample1 = new Double_t[nEvents1];
47
48 TH1D* h1smp = new TH1D("h1smp", "LogNormal distribution histogram", 100, 0, 500);
49 h1smp->SetStats(kFALSE);
50
51 for (UInt_t i = 0; i < nEvents1; ++i) {
52 //Double_t data = f1->GetRandom();
53 Double_t data = gRandom->Gaus(4,1);
55 sample1[i] = data;
56 h1smp->Fill(data);
57 }
58 // normalize correctly the histogram using the entries inside
59 h1smp->Scale( ROOT::Math::lognormal_cdf(500.,4.,1) / nEvents1, "width");
60
61 TCanvas* c = new TCanvas("c","1-Sample and 2-Samples GoF Tests");
62 c->Divide(1, 2);
63 TPad * pad = (TPad *)c->cd(1);
64 h1smp->Draw();
65 h1smp->SetLineColor(kBlue);
66 pad->SetLogy();
67 f1->SetNpx(100); // use same points as histo for drawing
69 f1->Draw("SAME");
70
71 // -----------------------------------------
72 // Create GoFTest object
73
74
76 //----------------------------------------------------
77 // Possible calls for the Anderson - DarlingTest test
78 // a) Returning the Anderson-Darling standardized test statistic
79 Double_t A2_1 = goftest_1-> AndersonDarlingTest("t");
80 Double_t A2_2 = (*goftest_1)(ROOT::Math::GoFTest::kAD, "t");
81 assert(A2_1 == A2_2);
82
83 // b) Returning the p-value for the Anderson-Darling test statistic
84 Double_t pvalueAD_1 = goftest_1-> AndersonDarlingTest(); // p-value is the default choice
85 Double_t pvalueAD_2 = (*goftest_1)(); // p-value and Anderson - Darling Test are the default choices
86 assert(pvalueAD_1 == pvalueAD_2);
87
88 // Rebuild the test using the default 1-sample construtor
89 delete goftest_1;
90 goftest_1 = new ROOT::Math::GoFTest(nEvents1, sample1 ); // User must then input a distribution type option
92
93 //--------------------------------------------------
94 // Possible calls for the Kolmogorov - Smirnov test
95 // a) Returning the Kolmogorov-Smirnov standardized test statistic
96 Double_t Dn_1 = goftest_1-> KolmogorovSmirnovTest("t");
97 Double_t Dn_2 = (*goftest_1)(ROOT::Math::GoFTest::kKS, "t");
98 assert(Dn_1 == Dn_2);
99
100 // b) Returning the p-value for the Kolmogorov-Smirnov test statistic
101 Double_t pvalueKS_1 = goftest_1-> KolmogorovSmirnovTest();
102 Double_t pvalueKS_2 = (*goftest_1)(ROOT::Math::GoFTest::kKS);
103 assert(pvalueKS_1 == pvalueKS_2);
104
105 // Valid but incorrect calls for the 2-samples methods of the 1-samples constructed goftest_1
106#ifdef TEST_ERROR_MESSAGE
107 Double_t A2 = (*goftest_1)(ROOT::Math::GoFTest::kAD2s, "t"); // Issues error message
108 Double_t pvalueKS = (*goftest_1)(ROOT::Math::GoFTest::kKS2s); // Issues error message
109 assert(A2 == pvalueKS);
110#endif
111
112 TPaveText* pt1 = new TPaveText(0.58, 0.6, 0.88, 0.80, "brNDC");
113 Char_t str1[50];
114 sprintf(str1, "p-value for A-D 1-smp test: %f", pvalueAD_1);
115 pt1->AddText(str1);
116 pt1->SetFillColor(18);
117 pt1->SetTextFont(20);
118 pt1->SetTextColor(4);
119 Char_t str2[50];
120 sprintf(str2, "p-value for K-S 1-smp test: %f", pvalueKS_1);
121 pt1->AddText(str2);
122 pt1->Draw();
123
124 // ------------------------------------------------------------------------
125 // Case 2: Create Gaussian random samples
126
127 UInt_t nEvents2 = 2000;
128
129 Double_t* sample2 = new Double_t[nEvents2];
130
131 TH1D* h2smps_1 = new TH1D("h2smps_1", "Gaussian distribution histograms", 100, 0, 500);
132 h2smps_1->SetStats(kFALSE);
133
134 TH1D* h2smps_2 = new TH1D("h2smps_2", "Gaussian distribution histograms", 100, 0, 500);
135 h2smps_2->SetStats(kFALSE);
136
137 TRandom3 r;
138 for (UInt_t i = 0; i < nEvents1; ++i) {
139 Double_t data = r.Gaus(300, 50);
140 sample1[i] = data;
141 h2smps_1->Fill(data);
142 }
143 h2smps_1->Scale(1. / nEvents1, "width");
144 c->cd(2);
145 h2smps_1->Draw();
146 h2smps_1->SetLineColor(kBlue);
147
148 for (UInt_t i = 0; i < nEvents2; ++i) {
149 Double_t data = r.Gaus(300, 50);
150 sample2[i] = data;
151 h2smps_2->Fill(data);
152 }
153 h2smps_2->Scale(1. / nEvents2, "width");
154 h2smps_2->Draw("SAME");
155 h2smps_2->SetLineColor(kRed);
156
157 // -----------------------------------------
158 // Create GoFTest object
159
160 ROOT::Math::GoFTest* goftest_2 = new ROOT::Math::GoFTest(nEvents1, sample1, nEvents2, sample2);
161
162 //----------------------------------------------------
163 // Possible calls for the Anderson - DarlingTest test
164 // a) Returning the Anderson-Darling standardized test statistic
165 A2_1 = goftest_2->AndersonDarling2SamplesTest("t");
166 A2_2 = (*goftest_2)(ROOT::Math::GoFTest::kAD2s, "t");
167 assert(A2_1 == A2_2);
168
169 // b) Returning the p-value for the Anderson-Darling test statistic
170 pvalueAD_1 = goftest_2-> AndersonDarling2SamplesTest(); // p-value is the default choice
171 pvalueAD_2 = (*goftest_2)(ROOT::Math::GoFTest::kAD2s); // p-value is the default choices
172 assert(pvalueAD_1 == pvalueAD_2);
173
174 //--------------------------------------------------
175 // Possible calls for the Kolmogorov - Smirnov test
176 // a) Returning the Kolmogorov-Smirnov standardized test statistic
177 Dn_1 = goftest_2-> KolmogorovSmirnov2SamplesTest("t");
178 Dn_2 = (*goftest_2)(ROOT::Math::GoFTest::kKS2s, "t");
179 assert(Dn_1 == Dn_2);
180
181 // b) Returning the p-value for the Kolmogorov-Smirnov test statistic
182 pvalueKS_1 = goftest_2-> KolmogorovSmirnov2SamplesTest();
183 pvalueKS_2 = (*goftest_2)(ROOT::Math::GoFTest::kKS2s);
184 assert(pvalueKS_1 == pvalueKS_2);
185
186#ifdef TEST_ERROR_MESSAGE
187 /* Valid but incorrect calls for the 1-sample methods of the 2-samples constucted goftest_2 */
188 A2 = (*goftest_2)(ROOT::Math::GoFTest::kAD, "t"); // Issues error message
189 pvalueKS = (*goftest_2)(ROOT::Math::GoFTest::kKS); // Issues error message
190 assert(A2 == pvalueKS);
191#endif
192
193 TPaveText* pt2 = new TPaveText(0.13, 0.6, 0.43, 0.8, "brNDC");
194 sprintf(str1, "p-value for A-D 2-smps test: %f", pvalueAD_1);
195 pt2->AddText(str1);
196 pt2->SetFillColor(18);
197 pt2->SetTextFont(20);
198 pt2->SetTextColor(4);
199 sprintf(str2, "p-value for K-S 2-smps test: %f", pvalueKS_1);
200 pt2-> AddText(str2);
201 pt2-> Draw();
202
203 // ------------------------------------------------------------------------
204 // Case 3: Create Landau random sample
205
206 UInt_t nEvents3 = 1000;
207
208 Double_t* sample3 = new Double_t[nEvents3];
209 for (UInt_t i = 0; i < nEvents3; ++i) {
210 Double_t data = r.Landau();
211 sample3[i] = data;
212 }
213
214 // ------------------------------------------
215 // Create GoFTest objects
216 //
217 // Possible constructors for the user input distribution
218
219 // a) User input PDF
220 ROOT::Math::Functor1D f(&landau);
221 double minimum = 3*TMath::MinElement(nEvents3, sample3);
222 double maximum = 3*TMath::MaxElement(nEvents3, sample3);
223 ROOT::Math::GoFTest* goftest_3a = new ROOT::Math::GoFTest(nEvents3, sample3, f, ROOT::Math::GoFTest::kPDF, minimum,maximum); // need to specify am interval
224 // b) User input CDF
226 ROOT::Math::GoFTest* goftest_3b = new ROOT::Math::GoFTest(nEvents3, sample3, fI, ROOT::Math::GoFTest::kCDF,minimum,maximum);
227
228
229 // Returning the p-value for the Anderson-Darling test statistic
230 pvalueAD_1 = goftest_3a-> AndersonDarlingTest(); // p-value is the default choice
231
232 pvalueAD_2 = (*goftest_3b)(); // p-value and Anderson - Darling Test are the default choices
233
234 // Checking consistency between both tests
235 std::cout << " \n\nTEST with LANDAU distribution:\t";
236 if (TMath::Abs(pvalueAD_1 - pvalueAD_2) > 1.E-1 * pvalueAD_2) {
237 std::cout << "FAILED " << std::endl;
238 Error("goftest","Error in comparing testing using Landau and Landau CDF");
239 std::cerr << " pvalues are " << pvalueAD_1 << " " << pvalueAD_2 << std::endl;
240 }
241 else
242 std::cout << "OK ( pvalues = " << pvalueAD_2 << " )" << std::endl;
243}
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
char Char_t
Definition: RtypesCore.h:37
const Bool_t kFALSE
Definition: RtypesCore.h:101
unsigned int UInt_t
Definition: RtypesCore.h:46
double Double_t
Definition: RtypesCore.h:59
@ kRed
Definition: Rtypes.h:66
@ kBlue
Definition: Rtypes.h:66
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition: TError.cxx:188
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
R__EXTERN TRandom * gRandom
Definition: TRandom.h:62
Functor1D class for one-dimensional functions.
Definition: Functor.h:506
GoFTest class implementing the 1 sample and 2 sample goodness of fit tests for uni-variate distributi...
Definition: GoFTest.h:65
@ kLogNormal
Gaussian distribution with default mean=0, sigma=1.
Definition: GoFTest.h:74
@ kKS
Anderson-Darling 2-Samples Test.
Definition: GoFTest.h:88
@ kKS2s
Kolmogorov-Smirnov Test.
Definition: GoFTest.h:89
@ kAD2s
Anderson-Darling Test. Default value.
Definition: GoFTest.h:87
@ kPDF
Input distribution is a CDF : cumulative distribution function.
Definition: GoFTest.h:81
void SetDistribution(EDistribution dist, const std::vector< double > &distParams={})
Sets the distribution for the predefined distribution types and optionally its parameters for 1-sampl...
Definition: GoFTest.cxx:123
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Performs the Anderson-Darling 2-Sample Test.
Definition: GoFTest.cxx:645
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Definition: TAttFill.h:37
virtual void SetLineColor(Color_t lcolor)
Set the line color.
Definition: TAttLine.h:40
virtual void SetTextColor(Color_t tcolor=1)
Set the text color.
Definition: TAttText.h:44
virtual void SetTextFont(Font_t tfont=62)
Set the text font.
Definition: TAttText.h:46
The Canvas class.
Definition: TCanvas.h:23
1-Dim function class
Definition: TF1.h:213
virtual void SetNpx(Int_t npx=100)
Set the number of points used to draw the function.
Definition: TF1.cxx:3468
void Draw(Option_t *option="") override
Draw this function with its current attributes.
Definition: TF1.cxx:1334
virtual void SetParameters(const Double_t *params)
Definition: TF1.h:649
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:620
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3338
void Draw(Option_t *option="") override
Draw this histogram with options.
Definition: TH1.cxx:3060
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:6586
virtual void SetStats(Bool_t stats=kTRUE)
Set statistics option on/off.
Definition: TH1.cxx:8857
The most important graphics class in the ROOT system.
Definition: TPad.h:28
void SetLogy(Int_t value=1) override
Set Lin/Log scale for Y.
Definition: TPad.cxx:5923
A Pave (see TPave) with text, lines or/and boxes inside.
Definition: TPaveText.h:21
virtual TText * AddText(Double_t x1, Double_t y1, const char *label)
Add a new Text line to this pavetext at given coordinates.
Definition: TPaveText.cxx:187
void Draw(Option_t *option="") override
Draw this pavetext with its current attributes.
Definition: TPaveText.cxx:238
Random number generator class based on M.
Definition: TRandom3.h:27
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition: TRandom.cxx:274
double landau_pdf(double x, double xi=1, double x0=0)
Probability density function of the Landau distribution:
double lognormal_cdf(double x, double m, double s, double x0=0)
Cumulative distribution function of the lognormal distribution (lower tail).
Double_t x[n]
Definition: legend1.C:17
TF1 * f1
Definition: legend1.C:11
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Definition: TMath.h:707
T MinElement(Long64_t n, const T *a)
Returns minimum of array a of length n.
Definition: TMath.h:958
T MaxElement(Long64_t n, const T *a)
Returns maximum of array a of length n.
Definition: TMath.h:966
Double_t LandauI(Double_t x)
Returns the value of the Landau distribution function at point x.
Definition: TMath.cxx:2831
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition: TMathBase.h:123
th1 Draw()