47 virtual ~CDFWrapper() {
if (fCDF)
delete fCDF; }
54 fXmin = -std::numeric_limits<double>::infinity();
55 fXmax = std::numeric_limits<double>::infinity();
65 if (x <= fXmin)
return 0;
66 if (x >= fXmax)
return 1.0;
67 return (*fCDF)(
x)/fNorm;
71 return new CDFWrapper(*fCDF,fXmin,fXmax);
80 mutable IntegratorOneDim fIntegral;
84 virtual ~PDFIntegral() {
if (fPDF)
delete fPDF; }
93 fIntegral.SetFunction(*fPDF);
95 fXmin = -std::numeric_limits<double>::infinity();
96 fXmax = std::numeric_limits<double>::infinity();
98 if (fXmin == -std::numeric_limits<double>::infinity() && fXmax == std::numeric_limits<double>::infinity() ) {
99 fNorm = fIntegral.Integral();
101 else if (fXmin == -std::numeric_limits<double>::infinity() )
102 fNorm = fIntegral.IntegralLow(fXmax);
103 else if (fXmax == std::numeric_limits<double>::infinity() )
104 fNorm = fIntegral.IntegralUp(fXmin);
106 fNorm = fIntegral.Integral(fXmin, fXmax);
110 if (x <= fXmin)
return 0;
111 if (x >= fXmax)
return 1.0;
112 if (fXmin == -std::numeric_limits<double>::infinity() )
113 return fIntegral.IntegralLow(x)/fNorm;
115 return fIntegral.Integral(fXmin,x)/fNorm;
119 return new PDFIntegral(*fPDF, fXmin, fXmax);
124 if (!(kGaussian <= dist && dist <= kExponential)) {
125 MATH_ERROR_MSG(
"SetDistribution",
"Cannot set distribution type! Distribution type option must be ennabled.");
135 fTestSampleFromH0(
kFALSE) {
136 Bool_t badSampleArg = sample1 == 0 || sample1Size == 0;
138 std::string msg =
"'sample1";
139 msg += !sample1Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
141 assert(!badSampleArg);
143 badSampleArg = sample2 == 0 || sample2Size == 0;
145 std::string msg =
"'sample2";
146 msg += !sample2Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
148 assert(!badSampleArg);
150 std::vector<const Double_t*> samples(2);
151 std::vector<UInt_t> samplesSizes(2);
152 samples[0] = sample1;
153 samples[1] = sample2;
154 samplesSizes[0] = sample1Size;
155 samplesSizes[1] = sample2Size;
164 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
166 std::string msg =
"'sample";
167 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
169 assert(!badSampleArg);
171 std::vector<const Double_t*> samples(1, sample);
172 std::vector<UInt_t> samplesSizes(1, sampleSize);
181 fCombinedSamples.assign(std::accumulate(samplesSizes.begin(), samplesSizes.end(), 0), 0.0);
182 UInt_t combinedSamplesSize = 0;
183 for (
UInt_t i = 0; i < samples.size(); ++i) {
184 fSamples[i].assign(samples[i], samples[i] + samplesSizes[i]);
186 for (
UInt_t j = 0; j < samplesSizes[i]; ++j) {
189 combinedSamplesSize += samplesSizes[i];
194 if (degenerateSamples) {
195 std::string msg =
"Degenerate sample";
196 msg += samplesSizes.size() > 1 ?
"s!" :
"!";
197 msg +=
" Sampling values all identical.";
199 assert(!degenerateSamples);
266 MATH_WARN_MSG(
"SetDistributionFunction",
"Distribution type is changed to user defined");
271 fCDF.reset(
new PDFIntegral(f, xmin, xmax) );
273 fCDF.reset(
new CDFWrapper(f, xmin, xmax) );
278 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
280 std::string msg =
"'sample";
281 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
283 assert(!badSampleArg);
289 fSamples = std::vector<std::vector<Double_t> >(1);
291 SetSamples(std::vector<const Double_t*>(1, sample), std::vector<UInt_t>(1, sampleSize));
314 Double_t sigmaN = 0.0,
h = 0.0,
H = 0.0,
g = 0.0,
a,
b, c, d, k = ns.size();
316 for (
UInt_t i = 0; i < ns.size(); ++i) {
317 H += 1.0 / double( ns[i] );
323 std::vector<double> invI(N);
324 for (
UInt_t i = 1; i <= N - 1; ++i) {
328 for (
UInt_t i = 1; i <= N - 2; ++i) {
329 double tmp = invI[N-i];
330 for (
UInt_t j = i + 1; j <= N - 1; ++j) {
337 const double emc = 0.5772156649015328606065120900824024;
342 a = (4 *
g - 6) * k + (10 - 6 *
g) *
H - 4 *
g + 6;
343 b = (2 * g - 4) * k2 + 8 *
h * k + (2 * g - 14 *
h - 4) *
H - 8 *
h + 4 * g - 6;
344 c = (6 *
h + 2 * g - 2) * k2 + (4 *
h - 4 *g + 6) * k + (2 *
h - 6) *
H + 4 *
h;
345 d = (2 *
h + 6) * k2 - 4 *
h * k;
347 sigmaN /= ( double(N - 1) * double(N - 2) * double(N - 3) );
382 double ts[ ] = { -1.1954, -1.5806, -1.8172,
383 -2.0032, -2.2526, -2.4204, -2.5283, -4.2649, -1.1786, -1.5394,
384 -1.7728, -1.9426, -2.1685, -2.3288, -2.4374, -3.8906, -1.166,
385 -1.5193, -1.7462, -1.9067, -2.126, -2.2818, -2.3926, -3.719,
386 -1.1407, -1.4659, -1.671, -1.8105, -2.0048, -2.1356, -2.2348,
387 -3.2905, -1.1253, -1.4371, -1.6314, -1.7619, -1.9396, -2.0637,
388 -2.1521, -3.0902, -1.0777, -1.3503, -1.5102, -1.6177, -1.761,
389 -1.8537, -1.9178, -2.5758, -1.0489, -1.2984, -1.4415, -1.5355,
390 -1.6625, -1.738, -1.7936, -2.3263, -0.9978, -1.2098, -1.3251,
391 -1.4007, -1.4977, -1.5555, -1.5941, -1.96, -0.9417, -1.1187,
392 -1.209, -1.2671, -1.3382, -1.379, -1.405, -1.6449, -0.8981, -1.0491,
393 -1.1235, -1.1692, -1.2249, -1.2552, -1.2755, -1.4395, -0.8598,
394 -0.9904, -1.0513, -1.0879, -1.1317, -1.155, -1.1694, -1.2816,
395 -0.7258, -0.7938, -0.8188, -0.8312, -0.8435, -0.8471, -0.8496,
396 -0.8416, -0.5966, -0.617, -0.6177, -0.6139, -0.6073, -0.5987,
397 -0.5941, -0.5244, -0.4572, -0.4383, -0.419, -0.4033, -0.3834,
398 -0.3676, -0.3587, -0.2533, -0.2966, -0.2428, -0.2078, -0.1844,
399 -0.1548, -0.1346, -0.1224, 0, -0.1009, -0.0169, 0.0304, 0.0596,
400 0.0933, 0.1156, 0.1294, 0.2533, 0.1571, 0.2635, 0.3169, 0.348,
401 0.3823, 0.4038, 0.4166, 0.5244, 0.5357, 0.6496, 0.6992, 0.7246,
402 0.7528, 0.7683, 0.7771, 0.8416, 1.2255, 1.2989, 1.3202, 1.3254,
403 1.3305, 1.3286, 1.3257, 1.2816, 1.5262, 1.5677, 1.5709, 1.5663,
404 1.5561, 1.5449, 1.5356, 1.4395, 1.9633, 1.943, 1.919, 1.8975,
405 1.8641, 1.8389, 1.8212, 1.6449, 2.7314, 2.5899, 2.5, 2.4451,
406 2.3664, 2.3155, 2.2823, 1.96, 3.7825, 3.4425, 3.2582, 3.1423,
407 3.0036, 2.9101, 2.8579, 2.3263, 4.1241, 3.716, 3.4984, 3.3651,
408 3.2003, 3.0928, 3.0311, 2.4324, 4.6044, 4.0847, 3.8348, 3.6714,
409 3.4721, 3.3453, 3.2777, 2.5758, 5.409, 4.7223, 4.4022, 4.1791,
410 3.9357, 3.7809, 3.6963, 2.807, 6.4954, 5.5823, 5.1456, 4.8657,
411 4.5506, 4.3275, 4.2228, 3.0902, 6.8279, 5.8282, 5.3658, 5.0749,
412 4.7318, 4.4923, 4.3642, 3.1747, 7.2755, 6.197, 5.6715, 5.3642,
413 4.9991, 4.7135, 4.5945, 3.2905, 8.1885, 6.8537, 6.2077, 5.8499,
414 5.4246, 5.1137, 4.9555, 3.4808, 9.3061, 7.6592, 6.85, 6.4806,
415 5.9919, 5.6122, 5.5136, 3.719, 9.6132, 7.9234, 7.1025, 6.6731,
416 6.1549, 5.8217, 5.7345, 3.7911, 10.0989, 8.2395, 7.4326, 6.9567,
417 6.3908, 6.011, 5.9566, 3.8906, 10.8825, 8.8994, 7.8934, 7.4501,
418 6.9009, 6.4538, 6.2705, 4.0556, 11.8537, 9.5482, 8.5568, 8.0283,
419 7.4418, 6.9524, 6.6195, 4.2649 };
426 double p[] = { .00001,.00005,.0001,.0005,.001,.005,.01,.025,.05,.075,.1,.2,.3,.4,.5,.6,.7,.8,.9,
427 .925,.95,.975,.99,.9925,.995,.9975,.999,.99925,.9995,.99975,.9999,.999925,.99995,.999975,.99999 };
430 const int nbins = 35;
437 MATH_ERROR_MSG(
"InterpolatePValues",
"Interpolation not implemented for nsamples not equal to 2");
440 std::vector<double> ts2(nbins);
441 std::vector<double> lp(nbins);
442 for (
int i = 0; i < nbins; ++i)
444 ts2[i] = ts[offset+ i *
ns];
446 lp[i] =
std::log( p[i]/(1.-p[i] ) );
450 int i1 = std::distance(ts2.begin(), std::lower_bound(ts2.begin(), ts2.end(), tx ) ) - 1;
458 if (i2 >=
int(ts2.size()) ) {
464 assert(i1 < (
int) lp.size() && i2 < (int) lp.size() );
467 double tx1 = ts2[i1];
468 double tx2 = ts2[i2];
472 double lp0 = (lp1-lp2) * (tx - tx2)/ ( tx1-tx2) + lp2;
475 double p0 =
exp(lp0)/(1. +
exp(lp0) );
487 }
else if (A2 < 2.) {
488 pvalue =
std::pow(A2, -0.5) *
std::exp(-1.2337141 / A2) * (2.00012 + (0.247105 - (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 * A2) * A2) * A2) * A2) * A2);
490 pvalue =
std::exp(-1. *
std::exp(1.0776 - (2.30695 - (0.43424 - (.082433 - (0.008056 - 0.0003146 * A2) * A2) * A2) * A2) * A2));
524 for (i = 0; i <
n; i++) {
538 for (i = 0; i <
n; i++) {
546 void adkTestStat(
double *adk,
const std::vector<std::vector<double> > & samples,
const std::vector<double> & zstar) {
552 int k = samples.size();
553 int l = zstar.size();
557 std::vector<int> fij (k*l);
561 std::vector<int> lvec(l);
579 std::vector<int>
ns(k);
581 for (i = 0; i < k; i++) {
582 ns[i] = samples[i].size();
590 for (j = 0; j <
l; j++) {
592 for (i = 0; i < k; i++) {
593 fij[i + j*k] =
getCount(zstar[j], &samples[i][0], ns[i]);
594 lvec[j] += fij[i + j*k];
601 for (i = 0; i < k; i++) {
607 for (j = 0; j <
l; j++) {
609 maij = mij - (double) fij[i + j*k] / 2.0;
610 bj =
getSum(&lvec[0], j + 1);
611 baj = bj - (double) lvec[j] / 2.0;
614 tmp = (double) nsum * mij - (
double) ns[i] * bj;
615 innerSum = innerSum + (double) lvec[j] * tmp * tmp /
616 (bj * ((
double) nsum - bj));
619 tmp = (double) nsum * maij - (
double) ns[i] * baj;
620 aInnerSum = aInnerSum + (double) lvec[j] * tmp * tmp /
621 (baj * (nsum - baj) - nsum * (double) lvec[j] / 4.0);
624 adk[0] = adk[0] + innerSum / ns[i];
625 adk[1] = adk[1] + aInnerSum / ns[i];
631 adk[0] = adk[0] / (double) nsum;
632 adk[1] = (nsum - 1) * adk[1] / ((
double) nsum * (double) nsum);
650 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
656 std::vector<Double_t>::iterator endUnique = std::unique(z.begin(), z.end());
657 z.erase(endUnique, z.end() );
658 std::vector<UInt_t>
h;
659 std::vector<Double_t>
H;
667 unsigned int nSamples =
fSamples.size();
670 for (std::vector<Double_t>::iterator
data = z.begin();
data != endUnique; ++
data) {
674 std::bind(std::less<Double_t>(), std::placeholders::_1, *
data)) + n / 2.);
676 std::cout <<
"time for H";
679 std::vector<std::vector<Double_t> >
F(nSamples);
680 for (
UInt_t i = 0; i < nSamples; ++i) {
681 for (std::vector<Double_t>::iterator
data = z.begin();
data != endUnique; ++
data) {
684 std::bind(std::less<Double_t>(), std::placeholders::_1, *
data)) + n / 2.);
687 std::cout <<
"time for F";
689 for (
UInt_t i = 0; i < nSamples; ++i) {
693 for (std::vector<Double_t>::iterator
data = z.begin();
data != endUnique; ++
data) {
694 sum_result += h[j] *
TMath::Power(N * F[i][j]-
fSamples[i].size() * H[j], 2) / (H[j] * (N - H[j]) - N * h[j] / 4.0);
697 std::cout <<
"time for sum_resut";
699 std::cout <<
"sum_result " << sum_result << std::endl;
700 A2 += 1.0 /
fSamples[i].size() * sum_result;
704 std::cout <<
"A2 - old Bartolomeo code " << A2 << std::endl;
709 double adk[2] = {0,0};
732 for (
unsigned int k = 0; k <
ns.size(); ++k)
ns[k] =
fSamples[k].size();
759 if (data1.
NDim() != 1 && data2.
NDim() != 1) {
760 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Bin Data set must be one-dimensional ");
763 unsigned int n1 = data1.
Size();
764 unsigned int n2 = data2.
Size();
770 std::vector<double> xdata(n1+n2);
771 for (
unsigned int i = 0; i < n1; ++i) {
773 const double * x = data1.
GetPoint(i, value);
777 for (
unsigned int i = 0; i < n2; ++i) {
779 const double * x = data2.
GetPoint(i, value);
783 double nall = ntot1+ntot2;
785 std::vector<unsigned int> index(n1+n2);
798 double x = xdata[ index[j] ];
803 unsigned int i = index[k];
806 value = data1.
Value(i);
813 value = data2.
Value(i);
820 }
while ( k < n1+n2 && xdata[ index[k] ] == x );
826 double tmp1 = ( nall * sum1 - ntot1 * sumAll );
827 double tmp2 = ( nall * sum2 - ntot2 * sumAll );
828 adsum += t * (tmp1*tmp1/ntot1 + tmp2*tmp2/ntot2) / ( sumAll * (nall - sumAll) ) ;
833 double A2 = adsum / nall;
836 std::vector<unsigned int>
ns(2);
857 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
866 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
870 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
875 for (
Int_t i = 0; i <
n ; ++i) {
883 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Cannot compute p-value: data below or above the distribution's thresholds. Check sample consistency.");
893 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
900 MATH_ERROR_MSG(
"KolmogorovSmirnov2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
905 std::vector<Double_t>
a(na);
906 std::vector<Double_t>
b(nb);
916 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
925 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
929 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
934 for (
UInt_t i = 0; i <
n; ++i) {
938 if (result > Dn) Dn = result;
948 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
void SetDistribution(EDistribution dist)
double dist(Rotation3D const &r1, Rotation3D const &r2)
static long int sum(long int i)
int getCount(double z, const double *dat, int n)
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
void operator()(ETestType test, Double_t &pvalue, Double_t &testStat) const
Namespace for new ROOT classes and functions.
void SetSamples(std::vector< const Double_t *> samples, const std::vector< UInt_t > samplesSizes)
void Print(Option_t *option="") const
Print the real and cpu time passed between the start and stop events.
Double_t KolmogorovProb(Double_t z)
Calculates the Kolmogorov distribution function,.
std::vector< Double_t > fCombinedSamples
void AndersonDarlingTest(Double_t &pvalue, Double_t &testStat) const
double inner_product(const LAVector &, const LAVector &)
#define MATH_WARN_MSG(loc, str)
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
const double * GetPoint(unsigned int ipoint, double &value) const
retrieve at the same time a pointer to the coordinate data and the fit value More efficient than call...
Double_t GaussianCDF(Double_t x) const
double normal_cdf(double x, double sigma=1, double x0=0)
Cumulative distribution function of the normal (Gaussian) distribution (lower tail).
static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2)
double pow(double, double)
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
static Double_t GetSigmaN(const std::vector< UInt_t > &ns, UInt_t N)
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
void KolmogorovSmirnovTest(Double_t &pvalue, Double_t &testStat) const
unsigned int Size() const
return number of fit points
#define MATH_ERROR_MSG(loc, str)
int getSum(const int *x, int n)
IBaseFunctionOneDim IGenFunction
Double_t KolmogorovTest(Int_t na, const Double_t *a, Int_t nb, const Double_t *b, Option_t *option)
Statistical test whether two one-dimensional sets of points are compatible with coming from the same ...
Class describing the binned data sets : vectors of x coordinates, y values and optionally error on y ...
std::vector< std::vector< Double_t > > fSamples
static const double x1[5]
void Instantiate(const Double_t *sample, UInt_t sampleSize)
double Value(unsigned int ipoint) const
return the value for the given fit point
void KolmogorovSmirnov2SamplesTest(Double_t &pvalue, Double_t &testStat) const
Namespace for new Math classes and functions.
Double_t PValueAD1Sample(Double_t A2) const
you should not use this method at all Int_t Int_t z
Double_t ExponentialCDF(Double_t x) const
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
unsigned int NDim() const
return coordinate data dimension
Double_t Sqrt(Double_t x)
void SetDistributionFunction(const IGenFunction &cdf, Bool_t isPDF, Double_t xmin, Double_t xmax)
static constexpr double ns
Template class to wrap any member function of a class taking a double and returning a double in a 1D ...
void adkTestStat(double *adk, const std::vector< std::vector< double > > &samples, const std::vector< double > &zstar)
double exponential_cdf(double x, double lambda, double x0=0)
Cumulative distribution function of the exponential distribution (lower tail).
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
static constexpr double g