47 virtual ~CDFWrapper() {
if (fCDF)
delete fCDF; }
54 fXmin = -std::numeric_limits<double>::infinity();
55 fXmax = std::numeric_limits<double>::infinity();
65 if (x <= fXmin)
return 0;
66 if (x >= fXmax)
return 1.0;
67 return (*fCDF)(
x)/fNorm;
71 return new CDFWrapper(*fCDF,fXmin,fXmax);
80 mutable IntegratorOneDim fIntegral;
84 virtual ~PDFIntegral() {
if (fPDF)
delete fPDF; }
93 fIntegral.SetFunction(*fPDF);
95 fXmin = -std::numeric_limits<double>::infinity();
96 fXmax = std::numeric_limits<double>::infinity();
98 if (fXmin == -std::numeric_limits<double>::infinity() && fXmax == std::numeric_limits<double>::infinity() ) {
99 fNorm = fIntegral.Integral();
101 else if (fXmin == -std::numeric_limits<double>::infinity() )
102 fNorm = fIntegral.IntegralLow(fXmax);
103 else if (fXmax == std::numeric_limits<double>::infinity() )
104 fNorm = fIntegral.IntegralUp(fXmin);
106 fNorm = fIntegral.Integral(fXmin, fXmax);
110 if (x <= fXmin)
return 0;
111 if (x >= fXmax)
return 1.0;
112 if (fXmin == -std::numeric_limits<double>::infinity() )
113 return fIntegral.IntegralLow(x)/fNorm;
115 return fIntegral.Integral(fXmin,x)/fNorm;
119 return new PDFIntegral(*fPDF, fXmin, fXmax);
124 if (!(kGaussian <= dist && dist <= kExponential)) {
125 MATH_ERROR_MSG(
"SetDistribution",
"Cannot set distribution type! Distribution type option must be ennabled.");
135 fTestSampleFromH0(
kFALSE) {
136 Bool_t badSampleArg = sample1 == 0 || sample1Size == 0;
138 std::string msg =
"'sample1";
139 msg += !sample1Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
141 assert(!badSampleArg);
143 badSampleArg = sample2 == 0 || sample2Size == 0;
145 std::string msg =
"'sample2";
146 msg += !sample2Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
148 assert(!badSampleArg);
150 std::vector<const Double_t*> samples(2);
151 std::vector<UInt_t> samplesSizes(2);
152 samples[0] = sample1;
153 samples[1] = sample2;
154 samplesSizes[0] = sample1Size;
155 samplesSizes[1] = sample2Size;
164 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
166 std::string msg =
"'sample";
167 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
169 assert(!badSampleArg);
171 std::vector<const Double_t*> samples(1, sample);
172 std::vector<UInt_t> samplesSizes(1, sampleSize);
181 fCombinedSamples.assign(std::accumulate(samplesSizes.begin(), samplesSizes.end(), 0), 0.0);
182 UInt_t combinedSamplesSize = 0;
183 for (
UInt_t i = 0; i < samples.size(); ++i) {
184 fSamples[i].assign(samples[i], samples[i] + samplesSizes[i]);
186 for (
UInt_t j = 0; j < samplesSizes[i]; ++j) {
189 combinedSamplesSize += samplesSizes[i];
194 if (degenerateSamples) {
195 std::string msg =
"Degenerate sample";
196 msg += samplesSizes.size() > 1 ?
"s!" :
"!";
197 msg +=
" Sampling values all identical.";
199 assert(!degenerateSamples);
265 MATH_WARN_MSG(
"SetDistributionFunction",
"Distribution type is changed to user defined");
270 fCDF.reset(
new PDFIntegral(f, xmin, xmax) );
272 fCDF.reset(
new CDFWrapper(f, xmin, xmax) );
277 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
279 std::string msg =
"'sample";
280 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
282 assert(!badSampleArg);
288 fSamples = std::vector<std::vector<Double_t> >(1);
290 SetSamples(std::vector<const Double_t*>(1, sample), std::vector<UInt_t>(1, sampleSize));
312 Double_t sigmaN = 0.0,
h = 0.0,
H = 0.0, g = 0.0,
a,
b, c, d, k = ns.size();
314 for (
UInt_t i = 0; i < ns.size(); ++i) {
315 H += 1.0 / double( ns[i] );
321 std::vector<double> invI(N);
322 for (
UInt_t i = 1; i <= N - 1; ++i) {
326 for (
UInt_t i = 1; i <= N - 2; ++i) {
327 double tmp = invI[N-i];
328 for (
UInt_t j = i + 1; j <= N - 1; ++j) {
335 const double emc = 0.5772156649015328606065120900824024;
340 a = (4 * g - 6) * k + (10 - 6 * g) *
H - 4 * g + 6;
341 b = (2 * g - 4) * k2 + 8 *
h * k + (2 * g - 14 *
h - 4) *
H - 8 *
h + 4 * g - 6;
342 c = (6 *
h + 2 * g - 2) * k2 + (4 *
h - 4 *g + 6) * k + (2 *
h - 6) *
H + 4 *
h;
343 d = (2 *
h + 6) * k2 - 4 *
h * k;
345 sigmaN /= ( double(N - 1) * double(N - 2) * double(N - 3) );
380 double ts[ ] = { -1.1954, -1.5806, -1.8172,
381 -2.0032, -2.2526, -2.4204, -2.5283, -4.2649, -1.1786, -1.5394,
382 -1.7728, -1.9426, -2.1685, -2.3288, -2.4374, -3.8906, -1.166,
383 -1.5193, -1.7462, -1.9067, -2.126, -2.2818, -2.3926, -3.719,
384 -1.1407, -1.4659, -1.671, -1.8105, -2.0048, -2.1356, -2.2348,
385 -3.2905, -1.1253, -1.4371, -1.6314, -1.7619, -1.9396, -2.0637,
386 -2.1521, -3.0902, -1.0777, -1.3503, -1.5102, -1.6177, -1.761,
387 -1.8537, -1.9178, -2.5758, -1.0489, -1.2984, -1.4415, -1.5355,
388 -1.6625, -1.738, -1.7936, -2.3263, -0.9978, -1.2098, -1.3251,
389 -1.4007, -1.4977, -1.5555, -1.5941, -1.96, -0.9417, -1.1187,
390 -1.209, -1.2671, -1.3382, -1.379, -1.405, -1.6449, -0.8981, -1.0491,
391 -1.1235, -1.1692, -1.2249, -1.2552, -1.2755, -1.4395, -0.8598,
392 -0.9904, -1.0513, -1.0879, -1.1317, -1.155, -1.1694, -1.2816,
393 -0.7258, -0.7938, -0.8188, -0.8312, -0.8435, -0.8471, -0.8496,
394 -0.8416, -0.5966, -0.617, -0.6177, -0.6139, -0.6073, -0.5987,
395 -0.5941, -0.5244, -0.4572, -0.4383, -0.419, -0.4033, -0.3834,
396 -0.3676, -0.3587, -0.2533, -0.2966, -0.2428, -0.2078, -0.1844,
397 -0.1548, -0.1346, -0.1224, 0, -0.1009, -0.0169, 0.0304, 0.0596,
398 0.0933, 0.1156, 0.1294, 0.2533, 0.1571, 0.2635, 0.3169, 0.348,
399 0.3823, 0.4038, 0.4166, 0.5244, 0.5357, 0.6496, 0.6992, 0.7246,
400 0.7528, 0.7683, 0.7771, 0.8416, 1.2255, 1.2989, 1.3202, 1.3254,
401 1.3305, 1.3286, 1.3257, 1.2816, 1.5262, 1.5677, 1.5709, 1.5663,
402 1.5561, 1.5449, 1.5356, 1.4395, 1.9633, 1.943, 1.919, 1.8975,
403 1.8641, 1.8389, 1.8212, 1.6449, 2.7314, 2.5899, 2.5, 2.4451,
404 2.3664, 2.3155, 2.2823, 1.96, 3.7825, 3.4425, 3.2582, 3.1423,
405 3.0036, 2.9101, 2.8579, 2.3263, 4.1241, 3.716, 3.4984, 3.3651,
406 3.2003, 3.0928, 3.0311, 2.4324, 4.6044, 4.0847, 3.8348, 3.6714,
407 3.4721, 3.3453, 3.2777, 2.5758, 5.409, 4.7223, 4.4022, 4.1791,
408 3.9357, 3.7809, 3.6963, 2.807, 6.4954, 5.5823, 5.1456, 4.8657,
409 4.5506, 4.3275, 4.2228, 3.0902, 6.8279, 5.8282, 5.3658, 5.0749,
410 4.7318, 4.4923, 4.3642, 3.1747, 7.2755, 6.197, 5.6715, 5.3642,
411 4.9991, 4.7135, 4.5945, 3.2905, 8.1885, 6.8537, 6.2077, 5.8499,
412 5.4246, 5.1137, 4.9555, 3.4808, 9.3061, 7.6592, 6.85, 6.4806,
413 5.9919, 5.6122, 5.5136, 3.719, 9.6132, 7.9234, 7.1025, 6.6731,
414 6.1549, 5.8217, 5.7345, 3.7911, 10.0989, 8.2395, 7.4326, 6.9567,
415 6.3908, 6.011, 5.9566, 3.8906, 10.8825, 8.8994, 7.8934, 7.4501,
416 6.9009, 6.4538, 6.2705, 4.0556, 11.8537, 9.5482, 8.5568, 8.0283,
417 7.4418, 6.9524, 6.6195, 4.2649 };
424 double p[] = { .00001,.00005,.0001,.0005,.001,.005,.01,.025,.05,.075,.1,.2,.3,.4,.5,.6,.7,.8,.9,
425 .925,.95,.975,.99,.9925,.995,.9975,.999,.99925,.9995,.99975,.9999,.999925,.99995,.999975,.99999 };
428 const int nbins = 35;
435 MATH_ERROR_MSG(
"InterpolatePValues",
"Interpolation not implemented for nsamples not equal to 2");
438 std::vector<double> ts2(nbins);
439 std::vector<double> lp(nbins);
440 for (
int i = 0; i <
nbins; ++i)
442 ts2[i] = ts[offset+ i * ns];
444 lp[i] =
std::log( p[i]/(1.-p[i] ) );
448 int i1 = std::distance(ts2.begin(), std::lower_bound(ts2.begin(), ts2.end(), tx ) ) - 1;
456 if (i2 >=
int(ts2.size()) ) {
462 assert(i1 < (
int) lp.size() && i2 < (int) lp.size() );
465 double tx1 = ts2[i1];
466 double tx2 = ts2[i2];
470 double lp0 = (lp1-lp2) * (tx - tx2)/ ( tx1-tx2) + lp2;
473 double p0 =
exp(lp0)/(1. +
exp(lp0) );
485 }
else if (A2 < 2.) {
486 pvalue =
std::pow(A2, -0.5) *
std::exp(-1.2337141 / A2) * (2.00012 + (0.247105 - (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 * A2) * A2) * A2) * A2) * A2);
488 pvalue =
std::exp(-1. *
std::exp(1.0776 - (2.30695 - (0.43424 - (.082433 - (0.008056 - 0.0003146 * A2) * A2) * A2) * A2) * A2));
522 for (i = 0; i <
n; i++) {
536 for (i = 0; i <
n; i++) {
544 void adkTestStat(
double *adk,
const std::vector<std::vector<double> > & samples,
const std::vector<double> & zstar) {
550 int k = samples.size();
551 int l = zstar.size();
555 std::vector<int> fij (k*l);
559 std::vector<int> lvec(l);
577 std::vector<int> ns(k);
579 for (i = 0; i < k; i++) {
580 ns[i] = samples[i].size();
588 for (j = 0; j <
l; j++) {
590 for (i = 0; i < k; i++) {
591 fij[i + j*k] =
getCount(zstar[j], &samples[i][0], ns[i]);
592 lvec[j] += fij[i + j*k];
599 for (i = 0; i < k; i++) {
605 for (j = 0; j <
l; j++) {
607 maij = mij - (double) fij[i + j*k] / 2.0;
608 bj =
getSum(&lvec[0], j + 1);
609 baj = bj - (double) lvec[j] / 2.0;
612 tmp = (double) nsum * mij - (
double) ns[i] * bj;
613 innerSum = innerSum + (double) lvec[j] * tmp * tmp /
614 (bj * ((
double) nsum - bj));
617 tmp = (double) nsum * maij - (
double) ns[i] * baj;
618 aInnerSum = aInnerSum + (double) lvec[j] * tmp * tmp /
619 (baj * (nsum - baj) - nsum * (double) lvec[j] / 4.0);
622 adk[0] = adk[0] + innerSum / ns[i];
623 adk[1] = adk[1] + aInnerSum / ns[i];
629 adk[0] = adk[0] / (double) nsum;
630 adk[1] = (nsum - 1) * adk[1] / ((
double) nsum * (double) nsum);
648 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
654 std::vector<Double_t>::iterator endUnique = std::unique(z.begin(), z.end());
655 z.erase(endUnique, z.end() );
656 std::vector<UInt_t>
h;
657 std::vector<Double_t>
H;
665 unsigned int nSamples =
fSamples.size();
668 for (std::vector<Double_t>::iterator
data = z.begin();
data != endUnique; ++
data) {
673 std::cout <<
"time for H";
676 std::vector<std::vector<Double_t> >
F(nSamples);
677 for (
UInt_t i = 0; i < nSamples; ++i) {
678 for (std::vector<Double_t>::iterator
data = z.begin();
data != endUnique; ++
data) {
680 F[i].push_back(std::count_if(
fSamples[i].begin(),
fSamples[i].end(), bind2nd(std::less<Double_t>(), *
data)) + n / 2.);
683 std::cout <<
"time for F";
685 for (
UInt_t i = 0; i < nSamples; ++i) {
689 for (std::vector<Double_t>::iterator
data = z.begin();
data != endUnique; ++
data) {
690 sum_result += h[j] *
TMath::Power(N * F[i][j]-
fSamples[i].size() * H[j], 2) / (H[j] * (N - H[j]) - N * h[j] / 4.0);
693 std::cout <<
"time for sum_resut";
695 std::cout <<
"sum_result " << sum_result << std::endl;
696 A2 += 1.0 /
fSamples[i].size() * sum_result;
700 std::cout <<
"A2 - old Bartolomeo code " << A2 << std::endl;
705 double adk[2] = {0,0};
727 std::vector<UInt_t> ns(
fSamples.size());
728 for (
unsigned int k = 0; k < ns.size(); ++k) ns[k] =
fSamples[k].size();
755 if (data1.
NDim() != 1 && data2.
NDim() != 1) {
756 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Bin Data set must be one-dimensional ");
759 unsigned int n1 = data1.
Size();
760 unsigned int n2 = data2.
Size();
766 std::vector<double> xdata(n1+n2);
767 for (
unsigned int i = 0; i < n1; ++i) {
769 const double * x = data1.
GetPoint(i, value);
773 for (
unsigned int i = 0; i < n2; ++i) {
775 const double * x = data2.
GetPoint(i, value);
779 double nall = ntot1+ntot2;
781 std::vector<unsigned int> index(n1+n2);
794 double x = xdata[ index[j] ];
799 unsigned int i = index[k];
802 value = data1.
Value(i);
809 value = data2.
Value(i);
816 }
while ( k < n1+n2 && xdata[ index[k] ] == x );
822 double tmp1 = ( nall * sum1 - ntot1 * sumAll );
823 double tmp2 = ( nall * sum2 - ntot2 * sumAll );
824 adsum += t * (tmp1*tmp1/ntot1 + tmp2*tmp2/ntot2) / ( sumAll * (nall - sumAll) ) ;
829 double A2 = adsum / nall;
832 std::vector<unsigned int> ns(2);
853 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
862 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
866 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
871 for (
Int_t i = 0; i <
n ; ++i) {
879 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Cannot compute p-value: data below or above the distribution's thresholds. Check sample consistency.");
889 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
896 MATH_ERROR_MSG(
"KolmogorovSmirnov2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
901 std::vector<Double_t>
a(na);
902 std::vector<Double_t>
b(nb);
912 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
921 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
925 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
930 for (
UInt_t i = 0; i <
n; ++i) {
934 if (result > Dn) Dn =
result;
944 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
void SetDistribution(EDistribution dist)
double dist(Rotation3D const &r1, Rotation3D const &r2)
static long int sum(long int i)
int getCount(double z, const double *dat, int n)
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
void operator()(ETestType test, Double_t &pvalue, Double_t &testStat) const
Namespace for new ROOT classes and functions.
void SetSamples(std::vector< const Double_t *> samples, const std::vector< UInt_t > samplesSizes)
void Print(Option_t *option="") const
Print the real and cpu time passed between the start and stop events.
Double_t KolmogorovProb(Double_t z)
Calculates the Kolmogorov distribution function,.
std::vector< Double_t > fCombinedSamples
void AndersonDarlingTest(Double_t &pvalue, Double_t &testStat) const
double inner_product(const LAVector &, const LAVector &)
#define MATH_WARN_MSG(loc, str)
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
const double * GetPoint(unsigned int ipoint, double &value) const
retrieve at the same time a pointer to the coordinate data and the fit value More efficient than call...
Double_t GaussianCDF(Double_t x) const
double cdf(double *x, double *p)
double normal_cdf(double x, double sigma=1, double x0=0)
Cumulative distribution function of the normal (Gaussian) distribution (lower tail).
static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2)
double pow(double, double)
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
static Double_t GetSigmaN(const std::vector< UInt_t > &ns, UInt_t N)
void KolmogorovSmirnovTest(Double_t &pvalue, Double_t &testStat) const
unsigned int Size() const
return number of fit points
#define MATH_ERROR_MSG(loc, str)
int getSum(const int *x, int n)
IBaseFunctionOneDim IGenFunction
Double_t KolmogorovTest(Int_t na, const Double_t *a, Int_t nb, const Double_t *b, Option_t *option)
Statistical test whether two one-dimensional sets of points are compatible with coming from the same ...
Class describing the binned data sets : vectors of x coordinates, y values and optionally error on y ...
std::vector< std::vector< Double_t > > fSamples
static const double x1[5]
void Instantiate(const Double_t *sample, UInt_t sampleSize)
double Value(unsigned int ipoint) const
return the value for the given fit point
void KolmogorovSmirnov2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Namespace for new Math classes and functions.
Double_t PValueAD1Sample(Double_t A2) const
you should not use this method at all Int_t Int_t z
Double_t ExponentialCDF(Double_t x) const
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
unsigned int NDim() const
return coordinate data dimension
Double_t Sqrt(Double_t x)
void SetDistributionFunction(const IGenFunction &cdf, Bool_t isPDF, Double_t xmin, Double_t xmax)
Template class to wrap any member function of a class taking a double and returning a double in a 1D ...
void adkTestStat(double *adk, const std::vector< std::vector< double > > &samples, const std::vector< double > &zstar)
double exponential_cdf(double x, double lambda, double x0=0)
Cumulative distribution function of the exponential distribution (lower tail).
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const