44 virtual ~CDFWrapper() {
if (fCDF)
delete fCDF; }
62 if (x <= fXmin)
return 0;
63 if (x >= fXmax)
return 1.0;
64 return (*fCDF)(
x)/fNorm;
68 return new CDFWrapper(*fCDF,fXmin,fXmax);
77 mutable IntegratorOneDim fIntegral;
81 virtual ~PDFIntegral() {
if (fPDF)
delete fPDF; }
90 fIntegral.SetFunction(*fPDF);
96 fNorm = fIntegral.Integral();
99 fNorm = fIntegral.IntegralLow(fXmax);
101 fNorm = fIntegral.IntegralUp(fXmin);
103 fNorm = fIntegral.Integral(fXmin, fXmax);
107 if (x <= fXmin)
return 0;
108 if (x >= fXmax)
return 1.0;
110 return fIntegral.IntegralLow(x)/fNorm;
112 return fIntegral.Integral(fXmin,x)/fNorm;
116 return new PDFIntegral(*fPDF, fXmin, fXmax);
122 MATH_ERROR_MSG(
"SetDistribution",
"Cannot set distribution type! Distribution type option must be ennabled.");
133 fTestSampleFromH0(
kFALSE) {
134 Bool_t badSampleArg = sample1 == 0 || sample1Size == 0;
136 std::string msg =
"'sample1";
137 msg += !sample1Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
141 badSampleArg = sample2 == 0 || sample2Size == 0;
143 std::string msg =
"'sample2";
144 msg += !sample2Size ?
"Size' cannot be zero" :
"' cannot be zero-length";
148 std::vector<const Double_t*> samples(2);
149 std::vector<UInt_t> samplesSizes(2);
150 samples[0] = sample1;
151 samples[1] = sample2;
152 samplesSizes[0] = sample1Size;
153 samplesSizes[1] = sample2Size;
162 fTestSampleFromH0(
kTRUE) {
163 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
165 std::string msg =
"'sample";
166 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
170 std::vector<const Double_t*> samples(1, sample);
171 std::vector<UInt_t> samplesSizes(1, sampleSize);
180 fCombinedSamples.assign(std::accumulate(samplesSizes.begin(), samplesSizes.end(), 0), 0.0);
181 UInt_t combinedSamplesSize = 0;
182 for (
UInt_t i = 0; i < samples.size(); ++i) {
183 fSamples[i].assign(samples[i], samples[i] + samplesSizes[i]);
185 for (
UInt_t j = 0; j < samplesSizes[i]; ++j) {
188 combinedSamplesSize += samplesSizes[i];
193 if (degenerateSamples) {
194 std::string msg =
"Degenerate sample";
195 msg += samplesSizes.size() > 1 ?
"s!" :
"!";
196 msg +=
" Sampling values all identical.";
198 assert(!degenerateSamples);
259 fCDF = std::auto_ptr<IGenFunction>(
cdf);
264 MATH_WARN_MSG(
"SetDistributionFunction",
"Distribution type is changed to user defined");
269 fCDF = std::auto_ptr<IGenFunction>(
new PDFIntegral(f, xmin, xmax) );
271 fCDF = std::auto_ptr<IGenFunction>(
new CDFWrapper(f, xmin, xmax) );
276 Bool_t badSampleArg = sample == 0 || sampleSize == 0;
278 std::string msg =
"'sample";
279 msg += !sampleSize ?
"Size' cannot be zero" :
"' cannot be zero-length";
287 fSamples = std::vector<std::vector<Double_t> >(1);
289 SetSamples(std::vector<const Double_t*>(1, sample), std::vector<UInt_t>(1, sampleSize));
311 Double_t sigmaN = 0.0,
h = 0.0,
H = 0.0,
g = 0.0,
a, b,
c, d, k = ns.size();
313 for (
UInt_t i = 0; i < ns.size(); ++i) {
320 std::vector<double> invI(N);
321 for (
UInt_t i = 1; i <= N - 1; ++i) {
325 for (
UInt_t i = 1; i <= N - 2; ++i) {
326 double tmp = invI[N-i];
327 for (
UInt_t j = i + 1; j <= N - 1; ++j) {
334 const double emc = 0.5772156649015328606065120900824024;
339 a = (4 *
g - 6) * k + (10 - 6 *
g) *
H - 4 *
g + 6;
340 b = (2 * g - 4) * k2 + 8 *
h * k + (2 * g - 14 *
h - 4) *
H - 8 *
h + 4 * g - 6;
341 c = (6 *
h + 2 * g - 2) * k2 + (4 *
h - 4 *g + 6) * k + (2 *
h - 6) *
H + 4 *
h;
342 d = (2 *
h + 6) * k2 - 4 *
h * k;
379 double ts[ ] = { -1.1954, -1.5806, -1.8172,
380 -2.0032, -2.2526, -2.4204, -2.5283, -4.2649, -1.1786, -1.5394,
381 -1.7728, -1.9426, -2.1685, -2.3288, -2.4374, -3.8906, -1.166,
382 -1.5193, -1.7462, -1.9067, -2.126, -2.2818, -2.3926, -3.719,
383 -1.1407, -1.4659, -1.671, -1.8105, -2.0048, -2.1356, -2.2348,
384 -3.2905, -1.1253, -1.4371, -1.6314, -1.7619, -1.9396, -2.0637,
385 -2.1521, -3.0902, -1.0777, -1.3503, -1.5102, -1.6177, -1.761,
386 -1.8537, -1.9178, -2.5758, -1.0489, -1.2984, -1.4415, -1.5355,
387 -1.6625, -1.738, -1.7936, -2.3263, -0.9978, -1.2098, -1.3251,
388 -1.4007, -1.4977, -1.5555, -1.5941, -1.96, -0.9417, -1.1187,
389 -1.209, -1.2671, -1.3382, -1.379, -1.405, -1.6449, -0.8981, -1.0491,
390 -1.1235, -1.1692, -1.2249, -1.2552, -1.2755, -1.4395, -0.8598,
391 -0.9904, -1.0513, -1.0879, -1.1317, -1.155, -1.1694, -1.2816,
392 -0.7258, -0.7938, -0.8188, -0.8312, -0.8435, -0.8471, -0.8496,
393 -0.8416, -0.5966, -0.617, -0.6177, -0.6139, -0.6073, -0.5987,
394 -0.5941, -0.5244, -0.4572, -0.4383, -0.419, -0.4033, -0.3834,
395 -0.3676, -0.3587, -0.2533, -0.2966, -0.2428, -0.2078, -0.1844,
396 -0.1548, -0.1346, -0.1224, 0, -0.1009, -0.0169, 0.0304, 0.0596,
397 0.0933, 0.1156, 0.1294, 0.2533, 0.1571, 0.2635, 0.3169, 0.348,
398 0.3823, 0.4038, 0.4166, 0.5244, 0.5357, 0.6496, 0.6992, 0.7246,
399 0.7528, 0.7683, 0.7771, 0.8416, 1.2255, 1.2989, 1.3202, 1.3254,
400 1.3305, 1.3286, 1.3257, 1.2816, 1.5262, 1.5677, 1.5709, 1.5663,
401 1.5561, 1.5449, 1.5356, 1.4395, 1.9633, 1.943, 1.919, 1.8975,
402 1.8641, 1.8389, 1.8212, 1.6449, 2.7314, 2.5899, 2.5, 2.4451,
403 2.3664, 2.3155, 2.2823, 1.96, 3.7825, 3.4425, 3.2582, 3.1423,
404 3.0036, 2.9101, 2.8579, 2.3263, 4.1241, 3.716, 3.4984, 3.3651,
405 3.2003, 3.0928, 3.0311, 2.4324, 4.6044, 4.0847, 3.8348, 3.6714,
406 3.4721, 3.3453, 3.2777, 2.5758, 5.409, 4.7223, 4.4022, 4.1791,
407 3.9357, 3.7809, 3.6963, 2.807, 6.4954, 5.5823, 5.1456, 4.8657,
408 4.5506, 4.3275, 4.2228, 3.0902, 6.8279, 5.8282, 5.3658, 5.0749,
409 4.7318, 4.4923, 4.3642, 3.1747, 7.2755, 6.197, 5.6715, 5.3642,
410 4.9991, 4.7135, 4.5945, 3.2905, 8.1885, 6.8537, 6.2077, 5.8499,
411 5.4246, 5.1137, 4.9555, 3.4808, 9.3061, 7.6592, 6.85, 6.4806,
412 5.9919, 5.6122, 5.5136, 3.719, 9.6132, 7.9234, 7.1025, 6.6731,
413 6.1549, 5.8217, 5.7345, 3.7911, 10.0989, 8.2395, 7.4326, 6.9567,
414 6.3908, 6.011, 5.9566, 3.8906, 10.8825, 8.8994, 7.8934, 7.4501,
415 6.9009, 6.4538, 6.2705, 4.0556, 11.8537, 9.5482, 8.5568, 8.0283,
416 7.4418, 6.9524, 6.6195, 4.2649 };
423 double p[] = { .00001,.00005,.0001,.0005,.001,.005,.01,.025,.05,.075,.1,.2,.3,.4,.5,.6,.7,.8,.9,
424 .925,.95,.975,.99,.9925,.995,.9975,.999,.99925,.9995,.99975,.9999,.999925,.99995,.999975,.99999 };
427 const int nbins = 35;
434 MATH_ERROR_MSG(
"InterpolatePValues",
"Interpolation not implemented for nsamples not equal to 2");
437 std::vector<double> ts2(nbins);
438 std::vector<double> lp(nbins);
439 for (
int i = 0; i <
nbins; ++i)
441 ts2[i] = ts[offset+ i * ns];
443 lp[i] =
std::log( p[i]/(1.-p[i] ) );
447 int i1 =
std::distance(ts2.begin(), std::lower_bound(ts2.begin(), ts2.end(), tx ) ) - 1;
455 if (i2 >=
int(ts2.size()) ) {
461 assert(i1 < (
int) lp.size() && i2 < (int) lp.size() );
464 double tx1 = ts2[i1];
465 double tx2 = ts2[i2];
469 double lp0 = (lp1-lp2) * (tx - tx2)/ ( tx1-tx2) + lp2;
472 double p0 =
exp(lp0)/(1. +
exp(lp0) );
484 }
else if (A2 < 2.) {
485 pvalue =
std::pow(A2, -0.5) *
std::exp(-1.2337141 / A2) * (2.00012 + (0.247105 - (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 * A2) * A2) * A2) * A2) * A2);
487 pvalue =
std::exp(-1. *
std::exp(1.0776 - (2.30695 - (0.43424 - (.082433 - (0.008056 - 0.0003146 * A2) * A2) * A2) * A2) * A2));
521 for (i = 0; i <
n; i++) {
535 for (i = 0; i <
n; i++) {
543 void adkTestStat(
double *adk,
const std::vector<std::vector<double> > & samples,
const std::vector<double> & zstar) {
549 int k = samples.size();
550 int l = zstar.size();
554 std::vector<int> fij (k*l);
558 std::vector<int> lvec(l);
576 std::vector<int> ns(k);
578 for (i = 0; i < k; i++) {
579 ns[i] = samples[i].size();
587 for (j = 0; j <
l; j++) {
589 for (i = 0; i < k; i++) {
590 fij[i + j*k] =
getCount(zstar[j], &samples[i][0], ns[i]);
591 lvec[j] += fij[i + j*k];
598 for (i = 0; i < k; i++) {
604 for (j = 0; j <
l; j++) {
606 maij = mij - (
double) fij[i + j*k] / 2.0;
607 bj =
getSum(&lvec[0], j + 1);
608 baj = bj - (
double) lvec[j] / 2.0;
611 tmp = (
double) nsum * mij - (
double) ns[i] * bj;
612 innerSum = innerSum + (
double) lvec[j] * tmp * tmp /
613 (bj * ((
double) nsum - bj));
616 tmp = (
double) nsum * maij - (
double) ns[i] * baj;
617 aInnerSum = aInnerSum + (
double) lvec[j] * tmp * tmp /
618 (baj * (nsum - baj) - nsum * (
double) lvec[j] / 4.0);
621 adk[0] = adk[0] + innerSum / ns[i];
622 adk[1] = adk[1] + aInnerSum / ns[i];
628 adk[0] = adk[0] / (
double) nsum;
629 adk[1] = (nsum - 1) * adk[1] / ((
double) nsum * (
double) nsum);
647 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
653 std::vector<Double_t>::iterator endUnique = std::unique(z.begin(), z.end());
654 z.erase(endUnique, z.end() );
655 std::vector<UInt_t>
h;
656 std::vector<Double_t>
H;
664 unsigned int nSamples =
fSamples.size();
667 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
672 std::cout <<
"time for H";
675 std::vector<std::vector<Double_t> >
F(nSamples);
676 for (
UInt_t i = 0; i < nSamples; ++i) {
677 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
679 F[i].push_back(std::count_if(
fSamples[i].begin(),
fSamples[i].end(), bind2nd(std::less<Double_t>(), *data)) + n / 2.);
682 std::cout <<
"time for F";
684 for (
UInt_t i = 0; i < nSamples; ++i) {
688 for (std::vector<Double_t>::iterator data = z.begin(); data != endUnique; ++data) {
689 sum_result += h[j] *
TMath::Power(N * F[i][j]-
fSamples[i].size() * H[j], 2) / (H[j] * (N - H[j]) - N * h[j] / 4.0);
692 std::cout <<
"time for sum_resut";
694 std::cout <<
"sum_result " << sum_result << std::endl;
695 A2 += 1.0 /
fSamples[i].size() * sum_result;
699 std::cout <<
"A2 - old Bartolomeo code " << A2 << std::endl;
704 double adk[2] = {0,0};
726 std::vector<UInt_t> ns(
fSamples.size());
727 for (
unsigned int k = 0; k < ns.size(); ++k) ns[k] =
fSamples[k].size();
754 if (data1.
NDim() != 1 && data2.
NDim() != 1) {
755 MATH_ERROR_MSG(
"AndersonDarling2SamplesTest",
"Bin Data set must be one-dimensional ");
758 unsigned int n1 = data1.
Size();
759 unsigned int n2 = data2.
Size();
765 std::vector<double> xdata(n1+n2);
766 for (
unsigned int i = 0; i < n1; ++i) {
768 const double * x = data1.
GetPoint(i, value);
772 for (
unsigned int i = 0; i < n2; ++i) {
774 const double * x = data2.
GetPoint(i, value);
778 double nall = ntot1+ntot2;
780 std::vector<unsigned int> index(n1+n2);
793 double x = xdata[ index[j] ];
798 unsigned int i = index[k];
801 value = data1.
Value(i);
808 value = data2.
Value(i);
815 }
while ( k < n1+n2 && xdata[ index[k] ] == x );
821 double tmp1 = ( nall * sum1 - ntot1 * sumAll );
822 double tmp2 = ( nall * sum2 - ntot2 * sumAll );
823 adsum += t * (tmp1*tmp1/ntot1 + tmp2*tmp2/ntot2) / ( sumAll * (nall - sumAll) ) ;
828 double A2 = adsum / nall;
831 std::vector<unsigned int> ns(2);
852 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
861 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
865 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
870 for (
Int_t i = 0; i <
n ; ++i) {
878 MATH_ERROR_MSG(
"AndersonDarlingTest",
"Cannot compute p-value: data below or above the distribution's thresholds. Check sample consistency.");
888 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
895 MATH_ERROR_MSG(
"KolmogorovSmirnov2SamplesTest",
"Only 1-sample tests can be issued with a 1-sample constructed GoFTest object!");
900 std::vector<Double_t>
a(na);
901 std::vector<Double_t> b(nb);
911 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
920 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Only 2-sample tests can be issued with a 2-sample constructed GoFTest object!");
924 MATH_ERROR_MSG(
"KolmogorovSmirnovTest",
"Distribution type is undefined! Please use SetDistribution(GoFTest::EDistribution).");
929 for (
UInt_t i = 0; i <
n; ++i) {
933 if (result > Dn) Dn =
result;
943 return (strncmp(option,
"p", 1) == 0 || strncmp(option,
"t", 1) != 0) ? pvalue : testStat;
void SetDistribution(EDistribution dist)
Double_t PValueAD1Sample(Double_t A2) const
double dist(Rotation3D const &r1, Rotation3D const &r2)
void Print(Option_t *option="") const
Print the real and cpu time passed between the start and stop events.
int getCount(double z, const double *dat, int n)
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
Namespace for new ROOT classes and functions.
Double_t KolmogorovProb(Double_t z)
Calculates the Kolmogorov distribution function, Begin_Html.
std::vector< Double_t > fCombinedSamples
Double_t distance(const TPoint2 &p1, const TPoint2 &p2)
double inner_product(const LAVector &, const LAVector &)
void KolmogorovSmirnovTest(Double_t &pvalue, Double_t &testStat) const
#define MATH_WARN_MSG(loc, str)
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
void operator()(ETestType test, Double_t &pvalue, Double_t &testStat) const
unsigned int Size() const
return number of fit points
double cdf(double *x, double *p)
double normal_cdf(double x, double sigma=1, double x0=0)
Cumulative distribution function of the normal (Gaussian) distribution (lower tail).
static Double_t PValueADKSamples(UInt_t nsamples, Double_t A2)
double pow(double, double)
void Sort(Index n, const Element *a, Index *index, Bool_t down=kTRUE)
static Double_t GetSigmaN(const std::vector< UInt_t > &ns, UInt_t N)
#define MATH_ERROR_MSG(loc, str)
void AndersonDarling2SamplesTest(Double_t &pvalue, Double_t &testStat) const
int getSum(const int *x, int n)
IBaseFunctionOneDim IGenFunction
Double_t GaussianCDF(Double_t x) const
Double_t KolmogorovTest(Int_t na, const Double_t *a, Int_t nb, const Double_t *b, Option_t *option)
Statistical test whether two one-dimensional sets of points are compatible with coming from the same ...
Class describing the binned data sets : vectors of x coordinates, y values and optionally error on y ...
std::vector< std::vector< Double_t > > fSamples
static const double x1[5]
void Instantiate(const Double_t *sample, UInt_t sampleSize)
double Value(unsigned int ipoint) const
return the value for the given fit point
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
Namespace for new Math classes and functions.
void SetSamples(std::vector< const Double_t * > samples, const std::vector< UInt_t > samplesSizes)
const double * GetPoint(unsigned int ipoint, double &value) const
retrieve at the same time a pointer to the coordinate data and the fit value More efficient than call...
Double_t Sqrt(Double_t x)
void SetDistributionFunction(const IGenFunction &cdf, Bool_t isPDF, Double_t xmin, Double_t xmax)
void AndersonDarlingTest(Double_t &pvalue, Double_t &testStat) const
Template class to wrap any member function of a class taking a double and returning a double in a 1D ...
void KolmogorovSmirnov2SamplesTest(Double_t &pvalue, Double_t &testStat) const
void adkTestStat(double *adk, const std::vector< std::vector< double > > &samples, const std::vector< double > &zstar)
unsigned int NDim() const
return coordinate data dimension
double exponential_cdf(double x, double lambda, double x0=0)
Cumulative distribution function of the exponential distribution (lower tail).
Double_t ExponentialCDF(Double_t x) const