47 virtual ~CDFWrapper() {
if (fCDF)
delete fCDF; }
54 fXmin = -std::numeric_limits<double>::infinity();
55 fXmax = std::numeric_limits<double>::infinity();
65 if (
x <= fXmin)
return 0;
66 if (
x >= fXmax)
return 1.0;
67 return (*fCDF)(
x)/fNorm;
71 return new CDFWrapper(*fCDF,fXmin,fXmax);
80 mutable IntegratorOneDim fIntegral;
84 virtual ~PDFIntegral() {
if (fPDF)
delete fPDF; }
93 fIntegral.SetFunction(*fPDF);
95 fXmin = -std::numeric_limits<double>::infinity();
96 fXmax = std::numeric_limits<double>::infinity();
98 if (fXmin == -std::numeric_limits<double>::infinity() && fXmax == std::numeric_limits<double>::infinity() ) {
99 fNorm = fIntegral.Integral();
101 else if (fXmin == -std::numeric_limits<double>::infinity() )
102 fNorm = fIntegral.IntegralLow(fXmax);
103 else if (fXmax == std::numeric_limits<double>::infinity() )
104 fNorm = fIntegral.IntegralUp(fXmin);
106 fNorm = fIntegral.Integral(fXmin, fXmax);
110 if (
x <= fXmin)
return 0;
111 if (
x >= fXmax)
return 1.0;
112 if (fXmin == -std::numeric_limits<double>::infinity() )
113 return fIntegral.IntegralLow(
x)/fNorm;
115 return fIntegral.Integral(fXmin,
x)/fNorm;
119 return new PDFIntegral(*fPDF, fXmin, fXmax);
125 MATH_ERROR_MSG(
"SetDistribution",
"Cannot set distribution type! Distribution type option must be ennabled.");
134 fSamples(std::vector<std::vector<
Double_t> >(2)),
135 fTestSampleFromH0(
kFALSE) {
136 Bool_t badSampleArg = sample1 == 0 || sample1Size == 0;
138 std::string msg =
"'sample1";
139 msg += !sample1Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
141 assert(!badSampleArg);
143 badSampleArg = sample2 == 0 || sample2Size == 0;
145 std::string msg =
"'sample2";
146 msg += !sample2Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
148 assert(!badSampleArg);
150 std::vector<const Double_t*> samples(2);
151 std::vector<UInt_t> samplesSizes(2);
152 samples[0] = sample1;
153 samples[1] = sample2;
154 samplesSizes[0] = sample1Size;
155 samplesSizes[1] = sample2Size;
162 fSamples(std::vector<std::vector<
Double_t> >(1)),
163 fTestSampleFromH0(
kTRUE) {
164 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
166 std::string msg =
"'sample";
167 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
169 assert(!badSampleArg);
171 std::vector<const Double_t*> samples(1, sample);
172 std::vector<UInt_t> samplesSizes(1, sampleSize);
181 fCombinedSamples.assign(std::accumulate(samplesSizes.begin(), samplesSizes.end(), 0u), 0.0);
182 UInt_t combinedSamplesSize = 0;
183 for (
UInt_t i = 0; i < samples.size(); ++i) {
184 fSamples[i].assign(samples[i], samples[i] + samplesSizes[i]);
186 for (
UInt_t j = 0; j < samplesSizes[i]; ++j) {
189 combinedSamplesSize += samplesSizes[i];
194 if (degenerateSamples) {
195 std::string msg =
"Degenerate sample";
196 msg += samplesSizes.size() > 1 ?
"s!" :
"!";
197 msg +=
" Sampling values all identical.";
199 assert(!degenerateSamples);
266 MATH_WARN_MSG(
"SetDistributionFunction",
"Distribution type is changed to user defined");
278 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
280 std::string msg =
"'sample";
281 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
283 assert(!badSampleArg);
289 fSamples = std::vector<std::vector<Double_t> >(1);
291 SetSamples(std::vector<const Double_t*>(1, sample), std::vector<UInt_t>(1, sampleSize));
316 for (
UInt_t i = 0; i <
ns.size(); ++i) {
323 std::vector<double> invI(
N);
324 for (
UInt_t i = 1; i <=
N - 1; ++i) {
328 for (
UInt_t i = 1; i <=
N - 2; ++i) {
329 double tmp = invI[
N-i];
330 for (
UInt_t j = i + 1; j <=
N - 1; ++j) {
337 const double emc = 0.5772156649015328606065120900824024;
342 a = (4 *
g - 6) * k + (10 - 6 *
g) *
H - 4 *
g + 6;
343 b = (2 *
g - 4) * k2 + 8 *
h * k + (2 *
g - 14 *
h - 4) *
H - 8 *
h + 4 *
g - 6;
344 c = (6 *
h + 2 *
g - 2) * k2 + (4 *
h - 4 *
g + 6) * k + (2 *
h - 6) *
H + 4 *
h;
345 d = (2 *
h + 6) * k2 - 4 *
h * k;
382 double ts[ ] = { -1.1954, -1.5806, -1.8172,
383 -2.0032, -2.2526, -2.4204, -2.5283, -4.2649, -1.1786, -1.5394,
384 -1.7728, -1.9426, -2.1685, -2.3288, -2.4374, -3.8906, -1.166,
385 -1.5193, -1.7462, -1.9067, -2.126, -2.2818, -2.3926, -3.719,
386 -1.1407, -1.4659, -1.671, -1.8105, -2.0048, -2.1356, -2.2348,
387 -3.2905, -1.1253, -1.4371, -1.6314, -1.7619, -1.9396, -2.0637,
388 -2.1521, -3.0902, -1.0777, -1.3503, -1.5102, -1.6177, -1.761,
389 -1.8537, -1.9178, -2.5758, -1.0489, -1.2984, -1.4415, -1.5355,
390 -1.6625, -1.738, -1.7936, -2.3263, -0.9978, -1.2098, -1.3251,
391 -1.4007, -1.4977, -1.5555, -1.5941, -1.96, -0.9417, -1.1187,
392 -1.209, -1.2671, -1.3382, -1.379, -1.405, -1.6449, -0.8981, -1.0491,
393 -1.1235, -1.1692, -1.2249, -1.2552, -1.2755, -1.4395, -0.8598,
394 -0.9904, -1.0513, -1.0879, -1.1317, -1.155, -1.1694, -1.2816,
395 -0.7258, -0.7938, -0.8188, -0.8312, -0.8435, -0.8471, -0.8496,
396 -0.8416, -0.5966, -0.617, -0.6177, -0.6139, -0.6073, -0.5987,
397 -0.5941, -0.5244, -0.4572, -0.4383, -0.419, -0.4033, -0.3834,
398 -0.3676, -0.3587, -0.2533, -0.2966, -0.2428, -0.2078, -0.1844,
399 -0.1548, -0.1346, -0.1224, 0, -0.1009, -0.0169, 0.0304, 0.0596,
400 0.0933, 0.1156, 0.1294, 0.2533, 0.1571, 0.2635, 0.3169, 0.348,
401 0.3823, 0.4038, 0.4166, 0.5244, 0.5357, 0.6496, 0.6992, 0.7246,
402 0.7528, 0.7683, 0.7771, 0.8416, 1.2255, 1.2989, 1.3202, 1.3254,
403 1.3305, 1.3286, 1.3257, 1.2816, 1.5262, 1.5677, 1.5709, 1.5663,
404 1.5561, 1.5449, 1.5356, 1.4395, 1.9633, 1.943, 1.919, 1.8975,
405 1.8641, 1.8389, 1.8212, 1.6449, 2.7314, 2.5899, 2.5, 2.4451,
406 2.3664, 2.3155, 2.2823, 1.96, 3.7825, 3.4425, 3.2582, 3.1423,
407 3.0036, 2.9101, 2.8579, 2.3263, 4.1241, 3.716, 3.4984, 3.3651,
408 3.2003, 3.0928, 3.0311, 2.4324, 4.6044, 4.0847, 3.8348, 3.6714,
409 3.4721, 3.3453, 3.2777, 2.5758, 5.409, 4.7223, 4.4022, 4.1791,
410 3.9357, 3.7809, 3.6963, 2.807, 6.4954, 5.5823, 5.1456, 4.8657,
411 4.5506, 4.3275, 4.2228, 3.0902, 6.8279, 5.8282, 5.3658, 5.0749,
412 4.7318, 4.4923, 4.3642, 3.1747, 7.2755, 6.197, 5.6715, 5.3642,
413 4.9991, 4.7135, 4.5945, 3.2905, 8.1885, 6.8537, 6.2077, 5.8499,
414 5.4246, 5.1137, 4.9555, 3.4808, 9.3061, 7.6592, 6.85, 6.4806,
415 5.9919, 5.6122, 5.5136, 3.719, 9.6132, 7.9234, 7.1025, 6.6731,
416 6.1549, 5.8217, 5.7345, 3.7911, 10.0989, 8.2395, 7.4326, 6.9567,
417 6.3908, 6.011, 5.9566, 3.8906, 10.8825, 8.8994, 7.8934, 7.4501,
418 6.9009, 6.4538, 6.2705, 4.0556, 11.8537, 9.5482, 8.5568, 8.0283,
419 7.4418, 6.9524, 6.6195, 4.2649 };
426 double p[] = { .00001,.00005,.0001,.0005,.001,.005,.01,.025,.05,.075,.1,.2,.3,.4,.5,.6,.7,.8,.9,
427 .925,.95,.975,.99,.9925,.995,.9975,.999,.99925,.9995,.99975,.9999,.999925,.99995,.999975,.99999 };
430 const int nbins = 35;
437 MATH_ERROR_MSG(
"InterpolatePValues",
"Interpolation not implemented for nsamples not equal to 2");
440 std::vector<double> ts2(nbins);
441 std::vector<double> lp(nbins);
442 for (
int i = 0; i < nbins; ++i)
444 ts2[i] = ts[offset+ i *
ns];
446 lp[i] =
std::log( p[i]/(1.-p[i] ) );
450 int i1 = std::distance(ts2.begin(), std::lower_bound(ts2.begin(), ts2.end(), tx ) ) - 1;
458 if (i2 >=
int(ts2.size()) ) {
464 assert(i1 < (
int) lp.size() && i2 < (
int) lp.size() );
467 double tx1 = ts2[i1];
468 double tx2 = ts2[i2];
472 double lp0 = (lp1-lp2) * (tx - tx2)/ ( tx1-tx2) + lp2;
475 double p0 =
exp(lp0)/(1. +
exp(lp0) );
487 }
else if (A2 < 2.) {
488 pvalue =
std::pow(A2, -0.5) *
std::exp(-1.2337141 / A2) * (2.00012 + (0.247105 - (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 * A2) * A2) * A2) * A2) * A2);
490 pvalue =
std::exp(-1. *
std::exp(1.0776 - (2.30695 - (0.43424 - (.082433 - (0.008056 - 0.0003146 * A2) * A2) * A2) * A2) * A2));
524 for (i = 0; i <
n; i++) {
538 for (i = 0; i <
n; i++) {
546void adkTestStat(
double *adk,
const std::vector<std::vector<double> > & samples,
const std::vector<double> & zstar) {
552 int k = samples.size();
553 int l = zstar.size();
557 std::vector<int> fij (k*
l);
561 std::vector<int> lvec(
l);
579 std::vector<int>
ns(k);
581 for (i = 0; i < k; i++) {
582 ns[i] = samples[i].size();
590 for (j = 0; j <
l; j++) {
592 for (i = 0; i < k; i++) {
593 fij[i + j*k] =
getCount(zstar[j], &samples[i][0],
ns[i]);
594 lvec[j] += fij[i + j*k];
601 for (i = 0; i < k; i++) {
607 for (j = 0; j <
l; j++) {
609 maij = mij - (
double) fij[i + j*k] / 2.0;
610 bj =
getSum(&lvec[0], j + 1);
611 baj = bj - (
double) lvec[j] / 2.0;
614 tmp = (
double) nsum * mij - (
double)
ns[i] * bj;
615 innerSum = innerSum + (
double) lvec[j] * tmp * tmp /
616 (bj * ((
double) nsum - bj));
619 tmp = (
double) nsum * maij - (
double)
ns[i] * baj;
620 aInnerSum = aInnerSum + (
double) lvec[j] * tmp * tmp /
621 (baj * (nsum - baj) - nsum * (
double) lvec[j] / 4.0);
624 adk[0] = adk[0] + innerSum /
ns[i];
625 adk[1] = adk[1] + aInnerSum /
ns[i];
631 adk[0] = adk[0] / (
double) nsum;
632 adk[1] = (nsum - 1) * adk[1] / ((
double) nsum * (
double) nsum);
650 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
656 std::vector<Double_t>::iterator endUnique = std::unique(z.begin(), z.end());
657 z.erase(endUnique, z.end() );
658 std::vector<UInt_t>
h;
659 std::vector<Double_t>
H;
667 unsigned int nSamples =
fSamples.size();
670 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
674 std::bind(std::less<Double_t>(), std::placeholders::_1, *data)) +
n / 2.);
676 std::cout <<
"time for H";
679 std::vector<std::vector<Double_t> >
F(nSamples);
680 for (
UInt_t i = 0; i < nSamples; ++i) {
681 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
684 std::bind(std::less<Double_t>(), std::placeholders::_1, *data)) +
n / 2.);
687 std::cout <<
"time for F";
689 for (
UInt_t i = 0; i < nSamples; ++i) {
693 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
697 std::cout <<
"time for sum_resut";
699 std::cout <<
"sum_result " << sum_result << std::endl;
700 A2 += 1.0 /
fSamples[i].size() * sum_result;
704 std::cout <<
"A2 - old Bartolomeo code " << A2 << std::endl;
709 double adk[2] = {0,0};
732 for (
unsigned int k = 0; k <
ns.size(); ++k)
ns[k] =
fSamples[k].size();
759 if (data1.
NDim() != 1 && data2.
NDim() != 1) {
760 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Bin Data set must be one-dimensional ");
763 unsigned int n1 = data1.
Size();
764 unsigned int n2 = data2.
Size();
770 std::vector<double> xdata(n1+n2);
771 for (
unsigned int i = 0; i < n1; ++i) {
773 const double *
x = data1.
GetPoint(i, value);
777 for (
unsigned int i = 0; i < n2; ++i) {
779 const double *
x = data2.
GetPoint(i, value);
783 double nall = ntot1+ntot2;
785 std::vector<unsigned int> index(n1+n2);
798 double x = xdata[ index[j] ];
803 unsigned int i = index[k];
806 value = data1.
Value(i);
813 value = data2.
Value(i);
820 }
while ( k < n1+n2 && xdata[ index[k] ] ==
x );
826 double tmp1 = ( nall * sum1 - ntot1 * sumAll );
827 double tmp2 = ( nall * sum2 - ntot2 * sumAll );
828 adsum += t * (tmp1*tmp1/ntot1 + tmp2*tmp2/ntot2) / ( sumAll * (nall - sumAll) ) ;
833 double A2 = adsum / nall;
836 std::vector<unsigned int>
ns(2);
857 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
866 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
870 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
875 for (
Int_t i = 0; i <
n ; ++i) {
883 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Cannot compute p-value: data below or above the distribution's thresholds. Check sample consistency.");
893 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
900 MATH_ERROR_MSG(
"KolmogorovSmirnov2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
905 std::vector<Double_t>
a(na);
906 std::vector<Double_t>
b(nb);
916 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
925 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
929 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
934 for (
UInt_t i = 0; i <
n; ++i) {
938 if (result > Dn) Dn = result;
948 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
#define MATH_ERROR_MSG(loc, str)
#define MATH_WARN_MSG(loc, str)
static const double x1[5]
double pow(double, double)
Class describing the binned data sets : vectors of x coordinates, y values and optionally error on y ...
const double * GetPoint(unsigned int ipoint, double &value) const
retrieve at the same time a pointer to the coordinate data and the fit value More efficient than call...
double Value(unsigned int ipoint) const
return the value for the given fit point
unsigned int Size() const
return number of fit points
unsigned int NDim() const
return coordinate data dimension
static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2)
void operator()(ETestType test, Double_t &pvalue, Double_t &testStat) const
void SetDistributionFunction(const IGenFunction &cdf, Bool_t isPDF, Double_t xmin, Double_t xmax)
std::unique_ptr< IGenFunction > fCDF
void SetSamples(std::vector< const Double_t * > samples, const std::vector< UInt_t > samplesSizes)
std::vector< Double_t > fCombinedSamples
void KolmogorovSmirnovTest(Double_t &pvalue, Double_t &testStat) const
void SetDistribution(EDistribution dist)
void Instantiate(const Double_t *sample, UInt_t sampleSize)
Double_t GaussianCDF(Double_t x) const
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
static Double_t GetSigmaN(const std::vector< UInt_t > &ns, UInt_t N)
void KolmogorovSmirnov2SamplesTest(Double_t &pvalue, Double_t &testStat) const
std::vector< std::vector< Double_t > > fSamples
Double_t PValueAD1Sample(Double_t A2) const
void AndersonDarlingTest(Double_t &pvalue, Double_t &testStat) const
Double_t ExponentialCDF(Double_t x) const
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
Template class to wrap any member function of a class taking a double and returning a double in a 1D ...
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
void Print(Option_t *option="") const
Print the real and cpu time passed between the start and stop events.
double normal_cdf(double x, double sigma=1, double x0=0)
Cumulative distribution function of the normal (Gaussian) distribution (lower tail).
double exponential_cdf(double x, double lambda, double x0=0)
Cumulative distribution function of the exponential distribution (lower tail).
Namespace for new Math classes and functions.
double dist(Rotation3D const &r1, Rotation3D const &r2)
void adkTestStat(double *adk, const std::vector< std::vector< double > > &samples, const std::vector< double > &zstar)
int getCount(double z, const double *dat, int n)
int getSum(const int *x, int n)
IBaseFunctionOneDim IGenFunction
double inner_product(const LAVector &, const LAVector &)
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
static constexpr double ns
Double_t KolmogorovTest(Int_t na, const Double_t *a, Int_t nb, const Double_t *b, Option_t *option)
Statistical test whether two one-dimensional sets of points are compatible with coming from the same ...
Double_t Sqrt(Double_t x)
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Double_t KolmogorovProb(Double_t z)
Calculates the Kolmogorov distribution function,.
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
static long int sum(long int i)