53 enum EIntegralResult{
kNorm, kMu, kSigma2, kUnitIntegration};
54 KernelIntegrand(
const TKDE* kde, EIntegralResult intRes);
58 EIntegralResult fIntegralResult;
64 fKernelFunction(nullptr),
69 fApproximateBias(nullptr),
71 fUseMirroring(false), fMirrorLeft(false), fMirrorRight(false), fAsymLeft(false), fAsymRight(false),
72 fUseBins(false), fNewData(false), fUseMinMaxFromData(false),
73 fNBins(0), fNEvents(0), fSumOfCounts(0), fUseBinsNEvents(0),
74 fMean(0.),fSigma(0.), fSigmaRob(0.), fXMin(0.), fXMax(0.),
75 fRho(0.), fAdaptiveBandwidthFactor(0.), fWeightSize(0)
104 fNBins = events < 10000 ? 1000 : std::min(10000,
int(events / 100)*10);
132 std::string options = opt.
Data();
134 std::vector<std::string> voption(numOpt,
"");
135 for (std::vector<std::string>::iterator it = voption.begin(); it != voption.end() && !options.empty(); ++it) {
136 size_t pos = options.find_last_of(
';');
137 if (pos == std::string::npos) {
141 *it = options.substr(pos + 1);
142 options = options.substr(0, pos);
144 for (std::vector<std::string>::iterator it = voption.begin(); it != voption.end(); ++it) {
145 size_t pos = (*it).find(
':');
146 if (pos != std::string::npos) {
147 GetOptions((*it).substr(0, pos) , (*it).substr(pos + 1));
158 std::vector<std::string> voption(numOpt,
"");
159 for (std::vector<std::string>::iterator it = voption.begin(); it != voption.end() && !options.empty(); ++it) {
160 size_t pos = options.find_last_of(
';');
161 if (pos == std::string::npos) {
165 *it = options.substr(pos + 1);
166 options = options.substr(0, pos);
170 for (std::vector<std::string>::iterator it = voption.begin(); it != voption.end() && !options.empty(); ++it) {
171 size_t pos = (*it).find(
':');
172 if (pos == std::string::npos)
break;
173 TString optionType = (*it).substr(0, pos);
174 TString optionInstance = (*it).substr(pos + 1);
178 foundPlotOPt =
kTRUE;
179 if (optionInstance.
Contains(
"estimate") || optionInstance.
Contains(
"errors") || optionInstance.
Contains(
"confidenceinterval"))
180 plotOpt = optionInstance;
182 this->
Warning(
"SetDrawOptions",
"Unknown plotting option %s: setting to KDE estimate plot.",optionInstance.
Data());
183 this->
Info(
"SetDrawOptions",
"Possible plotting options are: Estimate,Errors,ConfidenceInterval");
185 }
else if (optionType.
Contains(
"drawoptions")) {
186 foundDrawOPt =
kTRUE;
187 drawOpt = optionInstance;
191 this->
Warning(
"SetDrawOptions",
"No plotting option: setting to KDE estimate plot.");
192 plotOpt =
"estimate";
195 this->
Warning(
"SetDrawOptions",
"No drawing options: setting to default ones.");
202 if (optionType.compare(
"kerneltype") == 0) {
204 if (option.compare(
"gaussian") == 0) {
206 }
else if (option.compare(
"epanechnikov") == 0) {
208 }
else if (option.compare(
"biweight") == 0) {
210 }
else if (option.compare(
"cosinearch") == 0) {
212 }
else if (option.compare(
"userdefined") == 0) {
215 this->
Warning(
"GetOptions",
"Unknown kernel type option %s: setting to Gaussian",option.c_str());
216 this->
Info(
"GetOptions",
"Possible kernel type options are: Gaussian, Epanechnikov, Biweight, Cosinearch, Userdefined");
219 }
else if (optionType.compare(
"iteration") == 0) {
221 if (option.compare(
"adaptive") == 0) {
223 }
else if (option.compare(
"fixed") == 0) {
226 this->
Warning(
"GetOptions",
"Unknown iteration option %s: setting to Adaptive",option.c_str());
227 this->
Info(
"GetOptions",
"Possible iteration type options are: Adaptive, Fixed");
230 }
else if (optionType.compare(
"mirror") == 0) {
232 if (option.compare(
"nomirror") == 0) {
234 }
else if (option.compare(
"mirrorleft") == 0) {
236 }
else if (option.compare(
"mirrorright") == 0) {
238 }
else if (option.compare(
"mirrorboth") == 0) {
240 }
else if (option.compare(
"mirrorasymleft") == 0) {
242 }
else if (option.compare(
"mirrorrightasymleft") == 0) {
244 }
else if (option.compare(
"mirrorasymright") == 0) {
246 }
else if (option.compare(
"mirrorleftasymright") == 0) {
248 }
else if (option.compare(
"mirrorasymboth") == 0) {
251 this->
Warning(
"GetOptions",
"Unknown mirror option %s: setting to NoMirror",option.c_str());
252 this->
Info(
"GetOptions",
"Possible mirror type options are: NoMirror, MirrorLeft, MirrorRight, MirrorAsymLeft,"
253 "MirrorAsymRight, MirrorRightAsymLeft, MirrorLeftAsymRight, MirrorAsymBoth");
256 }
else if (optionType.compare(
"binning") == 0) {
258 if (option.compare(
"unbinned") == 0) {
260 }
else if (option.compare(
"relaxedbinning") == 0) {
262 }
else if (option.compare(
"forcedbinning") == 0) {
265 this->
Warning(
"GetOptions",
"Unknown binning option %s: setting to RelaxedBinning", option.c_str());
266 this->
Info(
"GetOptions",
"Possible binning type options are: Unbinned, ForcedBinning, RelaxedBinning");
291 Error(
"CheckOptions",
"Illegal user kernel type input! Use template constructor for user defined kernel.");
294 Warning(
"CheckOptions",
"Illegal user iteration type input - use default value !");
298 Warning(
"CheckOptions",
"Illegal user mirroring type input - use default value !");
302 Warning(
"CheckOptions",
"Illegal user binning type input - use default value !");
306 Warning(
"CheckOptions",
"Tuning factor rho cannot be non-positive - use default value !");
351 Error(
"SetNBins",
"Number of bins must be greater than zero.");
360 Warning(
"SetNBins",
"Bin type using SetBinning must be set for using a binned evaluation");
362 Warning(
"SetNBins",
"Bin type using SetBinning or with SetUseBinsNEvents must be set for using a binned evaluation");
385 Error(
"SetRange",
"Minimum range cannot be bigger or equal than the maximum range! Present range values remain the same.");
460 this->
Warning(
"SetData",
"Default number of bins is greater or equal to number of events. Use SetNBins(UInt_t) to set the appropriate number of bins");
490 Error(
"ReInit",
"TKDE does not contain any data !");
506 Error(
"InitFromNewData",
"Re-felling is not supported with binning");
551 int iLast = i0 + 2 *
fNBins - 1;
560 std::vector<Double_t> originalEvents =
fEvents;
565 std::bind(std::minus<Double_t>(), 2 *
fXMin, std::placeholders::_1));
570 std::bind(std::minus<Double_t>(), 2 *
fXMax, std::placeholders::_1));
608 fKernel = std::make_unique<TKernel>(weight,
this);
611 fKernel->ComputeAdaptiveWeights();
616 "Using a fix kernel - bandwidth = %f - using n = %d, rho = %f , sigmaRob = %f , mean = %f , sigma = %f "
617 ", canonicalBandwidth= %f",
621 "Using an adaptive kernel - weight = %f - using n = %d, rho = %f , sigmaRob = %f , mean = %f , sigma = %f "
622 ", canonicalBandwidth= %f",
631 Fatal(
"SetKernelFunction",
"Kernel function pointer is not null");
664 Error(
"SetKernelFunction",
"User kernel function is not defined !");
718 this->
Warning(
"Fill",
"Cannot fill data with data binned option. Data input ignored.");
721 fData.push_back(data);
729 this->
Warning(
"Fill",
"Cannot fill data with data binned option. Data input ignored.");
732 fData.push_back(data);
746 (
const_cast<TKDE*
>(
this))->ReInit();
755 if (
fNewData) (
const_cast<TKDE*
>(
this))->InitFromNewData();
761 if (
fNewData) (
const_cast<TKDE*
>(
this))->InitFromNewData();
768 return std::sqrt(result);
774fNWeights(kde->fData.size()),
780 unsigned int n = fKDE->fData.size();
781 Double_t minWeight = fWeights[0] * 0.05;
783 std::vector<Double_t> weights(
n, fWeights[0]);
784 bool useDataWeights = (fKDE->fBinCount.size() ==
n);
786 for (
unsigned int i = 0; i <
n; ++i) {
788 if (useDataWeights && fKDE->fBinCount[i] <= 0) {
789 weights[i] = fWeights[0];
792 f = (*fKDE->fKernel)(fKDE->fData[i]);
795 fKDE->Warning(
"ComputeAdativeWeights",
"function value is zero or negative for x = %f w = %f - set their bandwidth to zero",
796 fKDE->fData[i],(useDataWeights) ? fKDE->fBinCount[i] : 1.);
802 weights[i] = std::max(weights[i] /= std::sqrt(
f), minWeight);
803 fKDE->fAdaptiveBandwidthFactor += std::log(
f);
806 Double_t kAPPROX_GEO_MEAN = 0.241970724519143365;
808 fKDE->fAdaptiveBandwidthFactor = fKDE->fUseMirroring ? kAPPROX_GEO_MEAN / fKDE->fSigmaRob : std::sqrt(std::exp(fKDE->fAdaptiveBandwidthFactor / fKDE->fData.size()));
811 transform(weights.begin(), weights.end(), fWeights.begin(),
812 std::bind(std::multiplies<Double_t>(), std::placeholders::_1, fKDE->fAdaptiveBandwidthFactor));
818 return fWeights[fKDE->Index(
x)];
838 assert(
fEvents.size() == nevents);
842 for (
UInt_t i = 0; i < nevents; ++i) {
852 for (
UInt_t i = 0; i < nevents; ++i) {
909 else if (plotOpt.
Contains(
"confidenceinterval") ||
915 const char * s = strstr(plotOpt.
Data(),
"interval@");
917 if (s != 0) sscanf(s,
"interval@%lf",&level);
918 if((level <= 0) || (level >= 1)) {
919 Warning(
"Draw",
"given confidence level %.3lf is invalid - use default 0.95",level);
949 for (
UInt_t i = 0; i <=
n; ++i) {
951 y[i] = (*this)(
x[i]);
956 ge->
SetName(
"kde_graph_error");
971 upper->
Draw((
"same" + drawOpt).Data());
974 lower->
Draw((
"same" + drawOpt).Data());
985 this->
Warning(
"GetFixedWeight()",
"Fixed iteration option not enabled. Returning %f.", result);
987 result =
fKernel->GetFixedWeight();
995 this->
Warning(
"GetFixedWeight()",
"Adaptive iteration option not enabled. Returning a NULL pointer<");
998 if (
fNewData) (
const_cast<TKDE*
>(
this))->InitFromNewData();
999 return &(
fKernel->GetAdaptiveWeights()).front();
1015 UInt_t n = fKDE->fData.size();
1017 Bool_t useCount = (fKDE->fBinCount.size() ==
n);
1020 Double_t nSum = fKDE->fSumOfCounts;
1023 Bool_t hasAdaptiveWeights = (fWeights.size() ==
n);
1024 Double_t invWeight = (!hasAdaptiveWeights) ? 1. / fWeights[0] : 0;
1025 for (
UInt_t i = 0; i <
n; ++i) {
1026 Double_t binCount = (useCount) ? fKDE->fBinCount[i] : 1.0;
1030 if (hasAdaptiveWeights) {
1032 if (fWeights[i] == 0)
continue;
1033 invWeight = 1. / fWeights[i];
1035 result += binCount * invWeight * (*fKDE->fKernelFunction)((
x - fKDE->fData[i]) * invWeight );
1036 if (fKDE->fAsymLeft) {
1037 result += binCount * invWeight * (*fKDE->fKernelFunction)((
x - (2. * fKDE->fXMin - fKDE->fData[i])) * invWeight);
1039 if (fKDE->fAsymRight) {
1040 result += binCount * invWeight * (*fKDE->fKernelFunction)((
x - (2. * fKDE->fXMax - fKDE->fData[i])) * invWeight);
1045 fKDE->Warning(
"operator()",
"Result is NaN for x %f \n",
x);
1047 return result / nSum;
1056 if (bin == (
Int_t)
fData.size())
return --bin;
1059 }
else if (bin <= 0) {
1099 return std::sqrt(result);
1106 valid = valid && unity == 1.;
1108 Error(
"CheckKernelValidity",
"Kernel's integral is %f",unity);
1111 valid = valid && mu == 0.;
1113 Error(
"CheckKernelValidity",
"Kernel's mu is %f" ,mu);
1116 valid = valid && sigma2 > 0 && sigma2 != std::numeric_limits<Double_t>::infinity();
1118 Error(
"CheckKernelValidity",
"Kernel's sigma2 is %f",sigma2);
1121 Error(
"CheckKernelValidity",
"Validation conditions: the kernel's integral must be 1, the kernel's mu must be zero and the kernel's sigma2 must be finite positive to be a suitable kernel.");
1156 KernelIntegrand kernel(
this, TKDE::KernelIntegrand::kUnitIntegration);
1176 Double_t quantiles[2] = {0.0, 0.0};
1179 Double_t midspread = quantiles[1] - quantiles[0];
1196 Double_t quantiles[2] = {0.0, 0.0};
1199 Double_t lowerquartile = quantiles[0];
1200 Double_t upperquartile = quantiles[1];
1201 return upperquartile - lowerquartile;
1214TKDE::KernelIntegrand::KernelIntegrand(
const TKDE* kde, EIntegralResult intRes) : fKDE(kde), fIntegralResult(intRes) {
1220 if (fIntegralResult == kNorm) {
1221 return std::pow((*fKDE->fKernelFunction)(
x), 2);
1222 }
else if (fIntegralResult == kMu) {
1223 return x * (*fKDE->fKernelFunction)(
x);
1224 }
else if (fIntegralResult == kSigma2) {
1225 return std::pow(
x, 2) * (*fKDE->fKernelFunction)(
x);
1226 }
else if (fIntegralResult == kUnitIntegration) {
1227 return (*fKDE->fKernelFunction)(
x);
1237 if (xMin >= xMax) { xMin =
fXMin; xMax =
fXMax; }
1240 TF1 * pdf =
new TF1(
name.Data(),
this, xMin, xMax, 0);
1242 if (npx > 0) pdf->
SetNpx(npx);
1251 if (xMin >= xMax) { xMin =
fXMin; xMax =
fXMax; }
1254 if (npx > 0) upperPDF->
SetNpx(npx);
1264 if (xMin >= xMax) { xMin =
fXMin; xMax =
fXMax; }
1267 if (npx > 0) lowerPDF->
SetNpx(npx);
1276 if (xMin >= xMax) { xMin =
fXMin; xMax =
fXMax; }
1278 if (npx > 0) approximateBias->
SetNpx(npx);
1280 delete approximateBias;
static const double x2[5]
static const double x1[5]
TRObject operator()(const T1 &t1) const
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
User Class for performing numerical integration of a function in one dimension.
void SetFunction(Function &f)
method to set the a generic integration function
double Integral(Function &f, double a, double b)
evaluate the Integral of a function f over the defined interval (a,b)
User class for calculating the derivatives of a function.
double Derivative2(double x)
Returns the second derivative of the function at point x, computed by Richardson's extrapolation meth...
void SetFunction(const IGenFunction &f)
Set function for derivative calculation (copy the function if option has been enabled in the construc...
Template class to wrap any C++ callable object which takes one argument i.e.
Template class to wrap any member function of a class taking a double and returning a double in a 1D ...
virtual void SetLineColor(Color_t lcolor)
Set the line color.
virtual void SetTitle(const char *title="")
Set function title if title has the form "fffffff;xxxx;yyyy", it is assumed that the function title i...
virtual void SetNpx(Int_t npx=100)
Set the number of points used to draw the function.
virtual void Draw(Option_t *option="")
Draw this function with its current attributes.
static Bool_t DefaultAddToGlobalList(Bool_t on=kTRUE)
Static method to add/avoid to add automatically functions to the global list (gROOT->GetListOfFunctio...
TObject * Clone(const char *newname=0) const
Make a complete copy of the underlying object.
virtual void SetParameter(Int_t param, Double_t value)
A TGraphErrors is a TGraph with error bars.
virtual void SetName(const char *name="")
Set graph name.
virtual void SetTitle(const char *title="")
Change (i.e.
virtual void Draw(Option_t *chopt="")
Draw this graph with its current attributes.
1-D histogram with a double per channel (see TH1 documentation)}
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum=0)
Compute Quantiles for this histogram Quantile x_q of a probability distribution Function F is defined...
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Double_t GetRMS(Int_t axis=1) const
virtual void FillN(Int_t ntimes, const Double_t *x, const Double_t *w, Int_t stride=1)
Fill this histogram with an array x and weights w.
virtual Double_t GetSumOfWeights() const
Return the sum of weights excluding under/overflows.
void ComputeAdaptiveWeights()
Double_t GetFixedWeight() const
TKernel(Double_t weight, TKDE *kde)
Double_t GetWeight(Double_t x) const
Double_t operator()(Double_t x) const
const std::vector< Double_t > & GetAdaptiveWeights() const
Kernel Density Estimation class.
TF1 * GetPDFUpperConfidenceInterval(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
TF1 * GetKDEApproximateBias(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
void SetData(const Double_t *data, const Double_t *weights)
TF1 * fLowerPDF
Output Kernel Density Estimation upper confidence interval PDF function.
std::vector< Double_t > fKernelSigmas2
Double_t ComputeKernelL2Norm() const
TF1 * GetPDFLowerConfidenceInterval(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
void SetKernelType(EKernelType kern)
std::vector< Double_t > fCanonicalBandwidths
void SetKernelFunction(KernelFunction_Ptr kernfunc=0)
void ComputeDataStats()
Internal funciton to compute statistics (mean,stddev) using always all the provided data (i....
Double_t UpperConfidenceInterval(const Double_t *x, const Double_t *p) const
Double_t ApproximateBias(const Double_t *x, const Double_t *) const
Double_t ComputeMidspread()
void DrawConfidenceInterval(TString &drawOpt, double cl=0.95)
// Draws the KDE and its confidence interval
void SetMirroredEvents()
Intgernal function to mirror the data.
void SetUserCanonicalBandwidth()
void CheckKernelValidity()
const Double_t * GetAdaptiveWeights() const
Double_t fAdaptiveBandwidthFactor
Double_t LowerConfidenceInterval(const Double_t *x, const Double_t *p) const
std::vector< Double_t > fBinCount
EIteration
Iteration types. They can be set using SetIteration()
Double_t GetRAMISE() const
void SetIteration(EIteration iter)
Double_t ComputeKernelIntegral() const
Double_t CosineArchKernel(Double_t x) const
Double_t operator()(Double_t x) const
void SetUserKernelSigma2()
Double_t GetBias(Double_t x) const
std::vector< Double_t > fData
TGraphErrors * GetGraphWithErrors(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
return a TGraphErrors with the KDE values and errors The return object is managed by the user
void SetUseBinsNEvents(UInt_t nEvents)
std::vector< Double_t > fEvents
Double_t GetError(Double_t x) const
TF1 * GetKDEFunction(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
void SetBinning(EBinning)
void GetOptions(std::string optionType, std::string option)
std::vector< Bool_t > fSettedOptions
Double_t GaussianKernel(Double_t x) const
void SetRange(Double_t xMin, Double_t xMax)
virtual void Draw(const Option_t *option="")
Draws either the KDE functions or its errors.
Double_t ComputeKernelSigma2() const
void SetOptions(const Option_t *option, Double_t rho)
Double_t GetFixedWeight() const
TF1 * GetFunction(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
TF1 * GetUpperFunction(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
void Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t *data, const Double_t *weight, Double_t xMin, Double_t xMax, const Option_t *option, Double_t rho)
void SetDrawOptions(const Option_t *option, TString &plotOpt, TString &drawOpt)
EMirror
Data "mirroring" option to address the probability "spill out" boundary effect They can be set using ...
TGraphErrors * fGraph
Output Kernel Density Estimation approximate bias.
void SetCanonicalBandwidths()
TKDE()
default constructor used only by I/O
void SetBinCentreData(Double_t xmin, Double_t xmax)
void SetTuneFactor(Double_t rho)
UInt_t Index(Double_t x) const
compute the bin index given a data point x
TF1 * fUpperPDF
Output Kernel Density Estimation PDF function.
Double_t ComputeKernelMu() const
EKernelType
Types of Kernel functions They can be set using the function SetKernelType()
void DrawErrors(TString &drawOpt)
Draws a TGraphErrors wih KDE values and errors.
void SetNBins(UInt_t nbins)
void CheckOptions(Bool_t isUserDefinedKernel=kFALSE)
Double_t EpanechnikovKernel(Double_t x) const
Double_t BiweightKernel(Double_t x) const
std::unique_ptr< TKernel > fKernel
! internal kernel class. Transient because it is recreated after reading from a file
Bool_t fUseMinMaxFromData
Double_t GetSigma() const
EKernelType fKernelType
Graph with the errors.
TF1 * fApproximateBias
Output Kernel Density Estimation lower confidence interval PDF function.
void SetSigma(Double_t R)
std::vector< Double_t > fEventWeights
TF1 * GetApproximateBias(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
TF1 * GetLowerFunction(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
KernelFunction_Ptr fKernelFunction
! pointer to kernel function
friend struct KernelIntegrand
EBinning
Data binning option.
virtual const char * GetTitle() const
Returns title of object.
virtual const char * GetName() const
Returns name of object.
virtual void Warning(const char *method, const char *msgfmt,...) const
Issue warning message.
virtual void Error(const char *method, const char *msgfmt,...) const
Issue error message.
virtual void Fatal(const char *method, const char *msgfmt,...) const
Issue fatal error message.
virtual void Info(const char *method, const char *msgfmt,...) const
Issue info message.
void ToLower()
Change string to lower-case.
void Clear()
Clear string without changing its capacity.
const char * Data() const
TString & ReplaceAll(const TString &s1, const TString &s2)
void Form(const char *fmt,...)
Formats a string using a printf style format descriptor.
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
double normal_quantile(double z, double sigma)
Inverse ( ) of the cumulative distribution function of the lower tail of the normal (Gaussian) distri...
Double_t QuietNaN()
Returns a quiet NaN as defined by IEEE 754
void Quantiles(Int_t n, Int_t nprob, Double_t *x, Double_t *quantiles, Double_t *prob, Bool_t isSorted=kTRUE, Int_t *index=0, Int_t type=7)
Computes sample quantiles, corresponding to the given probabilities.