Loading [MathJax]/extensions/tex2jax.js
Logo ROOT  
Reference Guide
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
goftest.C File Reference

Detailed Description

View in nbviewer Open in SWAN GoFTest tutorial macro

Using Anderson-Darling and Kolmogorov-Smirnov goodness of fit tests 1 sample test is performed comparing data with a log-normal distribution and a 2 sample test is done comparing two gaussian data sets.

TEST with LANDAU distribution: OK ( pvalues = 0.777278 )
#include <cassert>
#include "TCanvas.h"
#include "TPaveText.h"
#include "TH1.h"
#include "TF1.h"
#include "Math/GoFTest.h"
#include "Math/Functor.h"
#include "TRandom3.h"
#include "Math/DistFunc.h"
// need to use Functor1D
double landau(double x) {
}
void goftest() {
// ------------------------------------------------------------------------
// Case 1: Create logNormal random sample
UInt_t nEvents1 = 1000;
//ROOT::Math::Random<ROOT::Math::GSLRngMT> r;
TF1 * f1 = new TF1("logNormal","ROOT::Math::lognormal_pdf(x,[0],[1])",0,500);
// set the lognormal parameters (m and s)
f1->SetParameters(4.0,1.0);
f1->SetNpx(1000);
Double_t* sample1 = new Double_t[nEvents1];
TH1D* h1smp = new TH1D("h1smp", "LogNormal distribution histogram", 100, 0, 500);
h1smp->SetStats(kFALSE);
for (UInt_t i = 0; i < nEvents1; ++i) {
//Double_t data = f1->GetRandom();
Double_t data = gRandom->Gaus(4,1);
data = TMath::Exp(data);
sample1[i] = data;
h1smp->Fill(data);
}
// normalize correctly the histogram using the entries inside
h1smp->Scale( ROOT::Math::lognormal_cdf(500.,4.,1) / nEvents1, "width");
TCanvas* c = new TCanvas("c","1-Sample and 2-Samples GoF Tests");
c->Divide(1, 2);
TPad * pad = (TPad *)c->cd(1);
h1smp->Draw();
pad->SetLogy();
f1->SetNpx(100); // use same points as histo for drawing
f1->Draw("SAME");
// -----------------------------------------
// Create GoFTest object
//----------------------------------------------------
// Possible calls for the Anderson - DarlingTest test
// a) Returning the Anderson-Darling standardized test statistic
Double_t A2_1 = goftest_1-> AndersonDarlingTest("t");
Double_t A2_2 = (*goftest_1)(ROOT::Math::GoFTest::kAD, "t");
assert(A2_1 == A2_2);
// b) Returning the p-value for the Anderson-Darling test statistic
Double_t pvalueAD_1 = goftest_1-> AndersonDarlingTest(); // p-value is the default choice
Double_t pvalueAD_2 = (*goftest_1)(); // p-value and Anderson - Darling Test are the default choices
assert(pvalueAD_1 == pvalueAD_2);
// Rebuild the test using the default 1-sample construtor
delete goftest_1;
goftest_1 = new ROOT::Math::GoFTest(nEvents1, sample1 ); // User must then input a distribution type option
//--------------------------------------------------
// Possible calls for the Kolmogorov - Smirnov test
// a) Returning the Kolmogorov-Smirnov standardized test statistic
Double_t Dn_1 = goftest_1-> KolmogorovSmirnovTest("t");
Double_t Dn_2 = (*goftest_1)(ROOT::Math::GoFTest::kKS, "t");
assert(Dn_1 == Dn_2);
// b) Returning the p-value for the Kolmogorov-Smirnov test statistic
Double_t pvalueKS_1 = goftest_1-> KolmogorovSmirnovTest();
Double_t pvalueKS_2 = (*goftest_1)(ROOT::Math::GoFTest::kKS);
assert(pvalueKS_1 == pvalueKS_2);
// Valid but incorrect calls for the 2-samples methods of the 1-samples constructed goftest_1
#ifdef TEST_ERROR_MESSAGE
Double_t A2 = (*goftest_1)(ROOT::Math::GoFTest::kAD2s, "t"); // Issues error message
Double_t pvalueKS = (*goftest_1)(ROOT::Math::GoFTest::kKS2s); // Issues error message
assert(A2 == pvalueKS);
#endif
TPaveText* pt1 = new TPaveText(0.58, 0.6, 0.88, 0.80, "brNDC");
Char_t str1[50];
sprintf(str1, "p-value for A-D 1-smp test: %f", pvalueAD_1);
pt1->AddText(str1);
pt1->SetFillColor(18);
pt1->SetTextFont(20);
pt1->SetTextColor(4);
Char_t str2[50];
sprintf(str2, "p-value for K-S 1-smp test: %f", pvalueKS_1);
pt1->AddText(str2);
pt1->Draw();
// ------------------------------------------------------------------------
// Case 2: Create Gaussian random samples
UInt_t nEvents2 = 2000;
Double_t* sample2 = new Double_t[nEvents2];
TH1D* h2smps_1 = new TH1D("h2smps_1", "Gaussian distribution histograms", 100, 0, 500);
h2smps_1->SetStats(kFALSE);
TH1D* h2smps_2 = new TH1D("h2smps_2", "Gaussian distribution histograms", 100, 0, 500);
h2smps_2->SetStats(kFALSE);
for (UInt_t i = 0; i < nEvents1; ++i) {
Double_t data = r.Gaus(300, 50);
sample1[i] = data;
h2smps_1->Fill(data);
}
h2smps_1->Scale(1. / nEvents1, "width");
c->cd(2);
h2smps_1->Draw();
h2smps_1->SetLineColor(kBlue);
for (UInt_t i = 0; i < nEvents2; ++i) {
Double_t data = r.Gaus(300, 50);
sample2[i] = data;
h2smps_2->Fill(data);
}
h2smps_2->Scale(1. / nEvents2, "width");
h2smps_2->Draw("SAME");
h2smps_2->SetLineColor(kRed);
// -----------------------------------------
// Create GoFTest object
ROOT::Math::GoFTest* goftest_2 = new ROOT::Math::GoFTest(nEvents1, sample1, nEvents2, sample2);
//----------------------------------------------------
// Possible calls for the Anderson - DarlingTest test
// a) Returning the Anderson-Darling standardized test statistic
A2_1 = goftest_2->AndersonDarling2SamplesTest("t");
A2_2 = (*goftest_2)(ROOT::Math::GoFTest::kAD2s, "t");
assert(A2_1 == A2_2);
// b) Returning the p-value for the Anderson-Darling test statistic
pvalueAD_1 = goftest_2-> AndersonDarling2SamplesTest(); // p-value is the default choice
pvalueAD_2 = (*goftest_2)(ROOT::Math::GoFTest::kAD2s); // p-value is the default choices
assert(pvalueAD_1 == pvalueAD_2);
//--------------------------------------------------
// Possible calls for the Kolmogorov - Smirnov test
// a) Returning the Kolmogorov-Smirnov standardized test statistic
Dn_1 = goftest_2-> KolmogorovSmirnov2SamplesTest("t");
Dn_2 = (*goftest_2)(ROOT::Math::GoFTest::kKS2s, "t");
assert(Dn_1 == Dn_2);
// b) Returning the p-value for the Kolmogorov-Smirnov test statistic
pvalueKS_1 = goftest_2-> KolmogorovSmirnov2SamplesTest();
pvalueKS_2 = (*goftest_2)(ROOT::Math::GoFTest::kKS2s);
assert(pvalueKS_1 == pvalueKS_2);
#ifdef TEST_ERROR_MESSAGE
/* Valid but incorrect calls for the 1-sample methods of the 2-samples constucted goftest_2 */
A2 = (*goftest_2)(ROOT::Math::GoFTest::kAD, "t"); // Issues error message
pvalueKS = (*goftest_2)(ROOT::Math::GoFTest::kKS); // Issues error message
assert(A2 == pvalueKS);
#endif
TPaveText* pt2 = new TPaveText(0.13, 0.6, 0.43, 0.8, "brNDC");
sprintf(str1, "p-value for A-D 2-smps test: %f", pvalueAD_1);
pt2->AddText(str1);
pt2->SetFillColor(18);
pt2->SetTextFont(20);
pt2->SetTextColor(4);
sprintf(str2, "p-value for K-S 2-smps test: %f", pvalueKS_1);
pt2-> AddText(str2);
pt2-> Draw();
// ------------------------------------------------------------------------
// Case 3: Create Landau random sample
UInt_t nEvents3 = 1000;
Double_t* sample3 = new Double_t[nEvents3];
for (UInt_t i = 0; i < nEvents3; ++i) {
Double_t data = r.Landau();
sample3[i] = data;
}
// ------------------------------------------
// Create GoFTest objects
//
// Possible constructors for the user input distribution
// a) User input PDF
double minimum = 3*TMath::MinElement(nEvents3, sample3);
double maximum = 3*TMath::MaxElement(nEvents3, sample3);
ROOT::Math::GoFTest* goftest_3a = new ROOT::Math::GoFTest(nEvents3, sample3, f, ROOT::Math::GoFTest::kPDF, minimum,maximum); // need to specify am interval
// b) User input CDF
ROOT::Math::GoFTest* goftest_3b = new ROOT::Math::GoFTest(nEvents3, sample3, fI, ROOT::Math::GoFTest::kCDF,minimum,maximum);
// Returning the p-value for the Anderson-Darling test statistic
pvalueAD_1 = goftest_3a-> AndersonDarlingTest(); // p-value is the default choice
pvalueAD_2 = (*goftest_3b)(); // p-value and Anderson - Darling Test are the default choices
// Checking consistency between both tests
std::cout << " \n\nTEST with LANDAU distribution:\t";
if (TMath::Abs(pvalueAD_1 - pvalueAD_2) > 1.E-1 * pvalueAD_2) {
std::cout << "FAILED " << std::endl;
Error("goftest","Error in comparing testing using Landau and Landau CDF");
std::cerr << " pvalues are " << pvalueAD_1 << " " << pvalueAD_2 << std::endl;
}
else
std::cout << "OK ( pvalues = " << pvalueAD_2 << " )" << std::endl;
}
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
char Char_t
Definition: RtypesCore.h:31
unsigned int UInt_t
Definition: RtypesCore.h:44
const Bool_t kFALSE
Definition: RtypesCore.h:90
double Double_t
Definition: RtypesCore.h:57
@ kRed
Definition: Rtypes.h:64
@ kBlue
Definition: Rtypes.h:64
void Error(const char *location, const char *msgfmt,...)
R__EXTERN TRandom * gRandom
Definition: TRandom.h:62
Functor1D class for one-dimensional functions.
Definition: Functor.h:493
void SetDistribution(EDistribution dist)
Definition: GoFTest.cxx:123
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Definition: GoFTest.cxx:646
virtual void SetFillColor(Color_t fcolor)
Set the fill area color.
Definition: TAttFill.h:37
virtual void SetLineColor(Color_t lcolor)
Set the line color.
Definition: TAttLine.h:40
virtual void SetTextColor(Color_t tcolor=1)
Set the text color.
Definition: TAttText.h:43
virtual void SetTextFont(Font_t tfont=62)
Set the text font.
Definition: TAttText.h:45
The Canvas class.
Definition: TCanvas.h:27
1-Dim function class
Definition: TF1.h:210
virtual void SetNpx(Int_t npx=100)
Set the number of points used to draw the function.
Definition: TF1.cxx:3435
virtual void Draw(Option_t *option="")
Draw this function with its current attributes.
Definition: TF1.cxx:1320
virtual void SetParameters(const Double_t *params)
Definition: TF1.h:638
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:614
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3275
virtual void Draw(Option_t *option="")
Draw this histogram with options.
Definition: TH1.cxx:2998
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:6246
virtual void SetStats(Bool_t stats=kTRUE)
Set statistics option on/off.
Definition: TH1.cxx:8446
The most important graphics class in the ROOT system.
Definition: TPad.h:29
virtual void SetLogy(Int_t value=1)
Set Lin/Log scale for Y.
Definition: TPad.cxx:5904
A Pave (see TPave) with text, lines or/and boxes inside.
Definition: TPaveText.h:21
virtual TText * AddText(Double_t x1, Double_t y1, const char *label)
Add a new Text line to this pavetext at given coordinates.
Definition: TPaveText.cxx:182
virtual void Draw(Option_t *option="")
Draw this pavetext with its current attributes.
Definition: TPaveText.cxx:233
Random number generator class based on M.
Definition: TRandom3.h:27
virtual Double_t Gaus(Double_t mean=0, Double_t sigma=1)
Samples a random number from the standard Normal (Gaussian) Distribution with the given mean and sigm...
Definition: TRandom.cxx:263
double landau_pdf(double x, double xi=1, double x0=0)
Probability density function of the Landau distribution:
double lognormal_cdf(double x, double m, double s, double x0=0)
Cumulative distribution function of the lognormal distribution (lower tail).
Double_t x[n]
Definition: legend1.C:17
TF1 * f1
Definition: legend1.C:11
Double_t Exp(Double_t x)
Definition: TMath.h:717
T MinElement(Long64_t n, const T *a)
Return minimum of array a of length n.
Definition: TMath.h:942
T MaxElement(Long64_t n, const T *a)
Return maximum of array a of length n.
Definition: TMath.h:949
Double_t LandauI(Double_t x)
Returns the value of the Landau distribution function at point x.
Definition: TMath.cxx:2796
Short_t Abs(Short_t d)
Definition: TMathBase.h:120
th1 Draw()
Author
Bartolomeu Rabacal

Definition in file goftest.C.