Logo ROOT  
Reference Guide
 
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
Loading...
Searching...
No Matches
goftest.C File Reference

Detailed Description

View in nbviewer Open in SWAN
GoFTest tutorial macro

Using Anderson-Darling and Kolmogorov-Smirnov goodness of fit tests 1 sample test is performed comparing data with a log-normal distribution and a 2 sample test is done comparing two gaussian data sets.

TEST with LANDAU distribution: OK ( pvalues = 0.777278 )
#include <cassert>
#include "TCanvas.h"
#include "TPaveText.h"
#include "TH1.h"
#include "TF1.h"
#include "Math/GoFTest.h"
#include "Math/Functor.h"
#include "TRandom3.h"
#include "Math/DistFunc.h"
// need to use Functor1D
double landau(double x) {
}
void goftest() {
// ------------------------------------------------------------------------
// Case 1: Create logNormal random sample
UInt_t nEvents1 = 1000;
//ROOT::Math::Random<ROOT::Math::GSLRngMT> r;
TF1 * f1 = new TF1("logNormal","ROOT::Math::lognormal_pdf(x,[0],[1])",0,500);
// set the lognormal parameters (m and s)
f1->SetParameters(4.0,1.0);
f1->SetNpx(1000);
Double_t* sample1 = new Double_t[nEvents1];
TH1D* h1smp = new TH1D("h1smp", "LogNormal distribution histogram", 100, 0, 500);
h1smp->SetStats(kFALSE);
for (UInt_t i = 0; i < nEvents1; ++i) {
//Double_t data = f1->GetRandom();
sample1[i] = data;
h1smp->Fill(data);
}
// normalize correctly the histogram using the entries inside
h1smp->Scale( ROOT::Math::lognormal_cdf(500.,4.,1) / nEvents1, "width");
TCanvas* c = new TCanvas("c","1-Sample and 2-Samples GoF Tests");
c->Divide(1, 2);
TPad * pad = (TPad *)c->cd(1);
h1smp->Draw();
pad->SetLogy();
f1->SetNpx(100); // use same points as histo for drawing
f1->Draw("SAME");
// -----------------------------------------
// Create GoFTest object
//----------------------------------------------------
// Possible calls for the Anderson - DarlingTest test
// a) Returning the Anderson-Darling standardized test statistic
Double_t A2_1 = goftest_1-> AndersonDarlingTest("t");
Double_t A2_2 = (*goftest_1)(ROOT::Math::GoFTest::kAD, "t");
assert(A2_1 == A2_2);
// b) Returning the p-value for the Anderson-Darling test statistic
Double_t pvalueAD_1 = goftest_1-> AndersonDarlingTest(); // p-value is the default choice
Double_t pvalueAD_2 = (*goftest_1)(); // p-value and Anderson - Darling Test are the default choices
assert(pvalueAD_1 == pvalueAD_2);
// Rebuild the test using the default 1-sample construtor
delete goftest_1;
goftest_1 = new ROOT::Math::GoFTest(nEvents1, sample1 ); // User must then input a distribution type option
//--------------------------------------------------
// Possible calls for the Kolmogorov - Smirnov test
// a) Returning the Kolmogorov-Smirnov standardized test statistic
Double_t Dn_1 = goftest_1-> KolmogorovSmirnovTest("t");
Double_t Dn_2 = (*goftest_1)(ROOT::Math::GoFTest::kKS, "t");
assert(Dn_1 == Dn_2);
// b) Returning the p-value for the Kolmogorov-Smirnov test statistic
Double_t pvalueKS_1 = goftest_1-> KolmogorovSmirnovTest();
Double_t pvalueKS_2 = (*goftest_1)(ROOT::Math::GoFTest::kKS);
assert(pvalueKS_1 == pvalueKS_2);
// Valid but incorrect calls for the 2-samples methods of the 1-samples constructed goftest_1
#ifdef TEST_ERROR_MESSAGE
Double_t A2 = (*goftest_1)(ROOT::Math::GoFTest::kAD2s, "t"); // Issues error message
Double_t pvalueKS = (*goftest_1)(ROOT::Math::GoFTest::kKS2s); // Issues error message
assert(A2 == pvalueKS);
#endif
TPaveText* pt1 = new TPaveText(0.58, 0.6, 0.88, 0.80, "brNDC");
Char_t str1[50];
sprintf(str1, "p-value for A-D 1-smp test: %f", pvalueAD_1);
pt1->AddText(str1);
pt1->SetFillColor(18);
pt1->SetTextFont(20);
pt1->SetTextColor(4);
Char_t str2[50];
sprintf(str2, "p-value for K-S 1-smp test: %f", pvalueKS_1);
pt1->AddText(str2);
pt1->Draw();
// ------------------------------------------------------------------------
// Case 2: Create Gaussian random samples
UInt_t nEvents2 = 2000;
Double_t* sample2 = new Double_t[nEvents2];
TH1D* h2smps_1 = new TH1D("h2smps_1", "Gaussian distribution histograms", 100, 0, 500);
h2smps_1->SetStats(kFALSE);
TH1D* h2smps_2 = new TH1D("h2smps_2", "Gaussian distribution histograms", 100, 0, 500);
h2smps_2->SetStats(kFALSE);
for (UInt_t i = 0; i < nEvents1; ++i) {
Double_t data = r.Gaus(300, 50);
sample1[i] = data;
h2smps_1->Fill(data);
}
h2smps_1->Scale(1. / nEvents1, "width");
c->cd(2);
h2smps_1->Draw();
h2smps_1->SetLineColor(kBlue);
for (UInt_t i = 0; i < nEvents2; ++i) {
Double_t data = r.Gaus(300, 50);
sample2[i] = data;
h2smps_2->Fill(data);
}
h2smps_2->Scale(1. / nEvents2, "width");
h2smps_2->Draw("SAME");
h2smps_2->SetLineColor(kRed);
// -----------------------------------------
// Create GoFTest object
ROOT::Math::GoFTest* goftest_2 = new ROOT::Math::GoFTest(nEvents1, sample1, nEvents2, sample2);
//----------------------------------------------------
// Possible calls for the Anderson - DarlingTest test
// a) Returning the Anderson-Darling standardized test statistic
A2_1 = goftest_2->AndersonDarling2SamplesTest("t");
A2_2 = (*goftest_2)(ROOT::Math::GoFTest::kAD2s, "t");
assert(A2_1 == A2_2);
// b) Returning the p-value for the Anderson-Darling test statistic
pvalueAD_1 = goftest_2-> AndersonDarling2SamplesTest(); // p-value is the default choice
pvalueAD_2 = (*goftest_2)(ROOT::Math::GoFTest::kAD2s); // p-value is the default choices
assert(pvalueAD_1 == pvalueAD_2);
//--------------------------------------------------
// Possible calls for the Kolmogorov - Smirnov test
// a) Returning the Kolmogorov-Smirnov standardized test statistic
Dn_1 = goftest_2-> KolmogorovSmirnov2SamplesTest("t");
Dn_2 = (*goftest_2)(ROOT::Math::GoFTest::kKS2s, "t");
assert(Dn_1 == Dn_2);
// b) Returning the p-value for the Kolmogorov-Smirnov test statistic
pvalueKS_1 = goftest_2-> KolmogorovSmirnov2SamplesTest();
pvalueKS_2 = (*goftest_2)(ROOT::Math::GoFTest::kKS2s);
assert(pvalueKS_1 == pvalueKS_2);
#ifdef TEST_ERROR_MESSAGE
/* Valid but incorrect calls for the 1-sample methods of the 2-samples constucted goftest_2 */
A2 = (*goftest_2)(ROOT::Math::GoFTest::kAD, "t"); // Issues error message
pvalueKS = (*goftest_2)(ROOT::Math::GoFTest::kKS); // Issues error message
assert(A2 == pvalueKS);
#endif
TPaveText* pt2 = new TPaveText(0.13, 0.6, 0.43, 0.8, "brNDC");
sprintf(str1, "p-value for A-D 2-smps test: %f", pvalueAD_1);
pt2->AddText(str1);
pt2->SetFillColor(18);
pt2->SetTextFont(20);
pt2->SetTextColor(4);
sprintf(str2, "p-value for K-S 2-smps test: %f", pvalueKS_1);
pt2-> AddText(str2);
pt2-> Draw();
// ------------------------------------------------------------------------
// Case 3: Create Landau random sample
UInt_t nEvents3 = 1000;
Double_t* sample3 = new Double_t[nEvents3];
for (UInt_t i = 0; i < nEvents3; ++i) {
Double_t data = r.Landau();
sample3[i] = data;
}
// ------------------------------------------
// Create GoFTest objects
//
// Possible constructors for the user input distribution
// a) User input PDF
double minimum = 3*TMath::MinElement(nEvents3, sample3);
double maximum = 3*TMath::MaxElement(nEvents3, sample3);
ROOT::Math::GoFTest* goftest_3a = new ROOT::Math::GoFTest(nEvents3, sample3, f, ROOT::Math::GoFTest::kPDF, minimum,maximum); // need to specify am interval
// b) User input CDF
ROOT::Math::GoFTest* goftest_3b = new ROOT::Math::GoFTest(nEvents3, sample3, fI, ROOT::Math::GoFTest::kCDF,minimum,maximum);
// Returning the p-value for the Anderson-Darling test statistic
pvalueAD_1 = goftest_3a-> AndersonDarlingTest(); // p-value is the default choice
pvalueAD_2 = (*goftest_3b)(); // p-value and Anderson - Darling Test are the default choices
// Checking consistency between both tests
std::cout << " \n\nTEST with LANDAU distribution:\t";
if (TMath::Abs(pvalueAD_1 - pvalueAD_2) > 1.E-1 * pvalueAD_2) {
std::cout << "FAILED " << std::endl;
Error("goftest","Error in comparing testing using Landau and Landau CDF");
std::cerr << " pvalues are " << pvalueAD_1 << " " << pvalueAD_2 << std::endl;
}
else
std::cout << "OK ( pvalues = " << pvalueAD_2 << " )" << std::endl;
}
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
char Char_t
Definition RtypesCore.h:37
unsigned int UInt_t
Definition RtypesCore.h:46
constexpr Bool_t kFALSE
Definition RtypesCore.h:101
double Double_t
Definition RtypesCore.h:59
@ kRed
Definition Rtypes.h:66
@ kBlue
Definition Rtypes.h:66
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Definition TError.cxx:197
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
R__EXTERN TRandom * gRandom
Definition TRandom.h:62
Functor1D class for one-dimensional functions.
Definition Functor.h:578
GoFTest class implementing the 1 sample and 2 sample goodness of fit tests for uni-variate distributi...
Definition GoFTest.h:65
@ kLogNormal
Gaussian distribution with default mean=0, sigma=1.
Definition GoFTest.h:74
@ kKS
Anderson-Darling 2-Samples Test.
Definition GoFTest.h:88
@ kKS2s
Kolmogorov-Smirnov Test.
Definition GoFTest.h:89
@ kAD2s
Anderson-Darling Test. Default value.
Definition GoFTest.h:87
@ kPDF
Input distribution is a CDF : cumulative distribution function.
Definition GoFTest.h:81
void SetDistribution(EDistribution dist, const std::vector< double > &distParams={})
Sets the distribution for the predefined distribution types and optionally its parameters for 1-sampl...
Definition GoFTest.cxx:123
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Performs the Anderson-Darling 2-Sample Test.
Definition GoFTest.cxx:645
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Definition TAttFill.h:37
virtual void SetLineColor(Color_t lcolor)
Set the line color.
Definition TAttLine.h:40
virtual void SetTextColor(Color_t tcolor=1)
Set the text color.
Definition TAttText.h:44
virtual void SetTextFont(Font_t tfont=62)
Set the text font.
Definition TAttText.h:46
The Canvas class.
Definition TCanvas.h:23
1-Dim function class
Definition TF1.h:213
virtual void SetNpx(Int_t npx=100)
Set the number of points used to draw the function.
Definition TF1.cxx:3431
void Draw(Option_t *option="") override
Draw this function with its current attributes.
Definition TF1.cxx:1334
virtual void SetParameters(const Double_t *params)
Definition TF1.h:649
1-D histogram with a double per channel (see TH1 documentation)}
Definition TH1.h:620
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition TH1.cxx:3338
void Draw(Option_t *option="") override
Draw this histogram with options.
Definition TH1.cxx:3060
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition TH1.cxx:6586
virtual void SetStats(Bool_t stats=kTRUE)
Set statistics option on/off.
Definition TH1.cxx:8856
The most important graphics class in the ROOT system.
Definition TPad.h:28
void SetLogy(Int_t value=1) override
Set Lin/Log scale for Y.
Definition TPad.cxx:5934
TVirtualPad * cd(Int_t subpadnumber=0) override
Set Current pad.
Definition TPad.cxx:597
A Pave (see TPave) with text, lines or/and boxes inside.
Definition TPaveText.h:21
virtual TText * AddText(Double_t x1, Double_t y1, const char *label)
Add a new Text line to this pavetext at given coordinates.
void Draw(Option_t *option="") override
Draw this pavetext with its current attributes.
Random number generator class based on M.
Definition TRandom3.h:27
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition TRandom.cxx:274
double landau_pdf(double x, double xi=1, double x0=0)
Probability density function of the Landau distribution:
double lognormal_cdf(double x, double m, double s, double x0=0)
Cumulative distribution function of the lognormal distribution (lower tail).
Double_t x[n]
Definition legend1.C:17
TF1 * f1
Definition legend1.C:11
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Definition TMath.h:707
T MinElement(Long64_t n, const T *a)
Returns minimum of array a of length n.
Definition TMath.h:958
T MaxElement(Long64_t n, const T *a)
Returns maximum of array a of length n.
Definition TMath.h:966
Double_t LandauI(Double_t x)
Returns the value of the Landau distribution function at point x.
Definition TMath.cxx:2831
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition TMathBase.h:123
th1 Draw()
Author
Bartolomeu Rabacal

Definition in file goftest.C.