Logo ROOT   6.10/09
Reference Guide
TKDE.h
Go to the documentation of this file.
1 // @(#)root/hist:$Id$
2 // Authors: Bartolomeu Rabacal 07/2010
3 /**********************************************************************
4  * *
5  * Copyright (c) 2006 , LCG ROOT MathLib Team *
6  * *
7  * *
8  **********************************************************************/
9 // Header file for TKDE
10 
11 #ifndef ROOT_TKDE
12 #define ROOT_TKDE
13 
14 #include "Math/WrappedFunction.h"
15 
16 #include "TNamed.h"
17 
18 #include "Math/Math.h"
19 
20 //#include "TF1.h"
21 class TGraphErrors;
22 class TF1;
23 
24 /*
25  Kernel Density Estimation class. The three main references are (1) "Scott DW, Multivariate Density Estimation.
26 Theory, Practice and Visualization. New York: Wiley", (2) "Jann Ben - ETH Zurich, Switzerland -, Univariate kernel density estimation document for KDENS: Stata module for univariate kernel density estimation." and (3) "Hardle W, Muller M, Sperlich S, Werwatz A, Nonparametric and Semiparametric Models. Springer."
27  The algorithm is briefly described in (4) "Cranmer KS, Kernel Estimation in High-Energy
28 Physics. Computer Physics Communications 136:198-207,2001" - e-Print Archive: hep ex/0011057.
29  A binned version is also implemented to address the performance issue due to its data size dependence.
30 */
31 class TKDE : public TNamed {
32 public:
33 
34  enum EKernelType { // Kernel function type option
39  kUserDefined, // Internal use only for the class's template constructor
40  kTotalKernels // Internal use only for member initialization
41  };
42 
43  enum EIteration { // KDE fitting option
46  };
47 
48  enum EMirror { // Data "mirroring" option to address the probability "spill out" boundary effect
58  };
59 
60  enum EBinning{ // Data binning option
62  kRelaxedBinning, // The algorithm is allowed to use binning if the data is large enough
64  };
65 
66  explicit TKDE(UInt_t events = 0, const Double_t* data = 0, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option =
67  "KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
68  Instantiate( nullptr, events, data, nullptr, xMin, xMax, option, rho);
69  }
70 
71  TKDE(UInt_t events, const Double_t* data, const Double_t* dataWeight, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option =
72  "KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
73  Instantiate( nullptr, events, data, dataWeight, xMin, xMax, option, rho);
74  }
75 
76  template<class KernelFunction>
77  TKDE(const Char_t* /*name*/, const KernelFunction& kernfunc, UInt_t events, const Double_t* data, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
78  Instantiate(new ROOT::Math::WrappedFunction<const KernelFunction&>(kernfunc), events, data, nullptr, xMin, xMax, option, rho);
79  }
80  template<class KernelFunction>
81  TKDE(const Char_t* /*name*/, const KernelFunction& kernfunc, UInt_t events, const Double_t* data, const Double_t * dataWeight, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
82  Instantiate(new ROOT::Math::WrappedFunction<const KernelFunction&>(kernfunc), events, data, dataWeight, xMin, xMax, option, rho);
83  }
84 
85  virtual ~TKDE();
86 
87  void Fill(Double_t data);
88  void Fill(Double_t data, Double_t weight);
89  void SetKernelType(EKernelType kern);
90  void SetIteration(EIteration iter);
91  void SetMirror(EMirror mir);
92  void SetBinning(EBinning);
93  void SetNBins(UInt_t nbins);
95  void SetTuneFactor(Double_t rho);
96  void SetRange(Double_t xMin, Double_t xMax); // By default computed from the data
97 
98  virtual void Draw(const Option_t* option = "");
99 
100  Double_t operator()(Double_t x) const;
101  Double_t operator()(const Double_t* x, const Double_t* p=0) const; // Needed for creating TF1
102 
103  Double_t GetValue(Double_t x) const { return (*this)(x); }
104  Double_t GetError(Double_t x) const;
105 
106  Double_t GetBias(Double_t x) const;
107  Double_t GetMean() const;
108  Double_t GetSigma() const;
109  Double_t GetRAMISE() const;
110 
111  Double_t GetFixedWeight() const;
112 
113  TF1* GetFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
114  TF1* GetUpperFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
115  TF1* GetLowerFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
116  TF1* GetApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
117  TGraphErrors * GetGraphWithErrors(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
118 
119  // get the drawn object to chanage settings
120  // These objects are managed by TKDE and should not be deleted by the user
121  TF1 * GetDrawnFunction() { return fPDF;}
125 
126  const Double_t * GetAdaptiveWeights() const;
127 
128 
129 private:
130 
131  TKDE(TKDE& kde); // Disallowed copy constructor
132  TKDE operator=(TKDE& kde); // Disallowed assign operator
133 
135  KernelFunction_Ptr fKernelFunction;
136 
137  class TKernel;
138  friend class TKernel;
139 
141 
142  std::vector<Double_t> fData; // Data events
143  std::vector<Double_t> fEvents; // Original data storage
144  std::vector<Double_t> fEventWeights; // Original data weights
145 
146  TF1* fPDF; // Output Kernel Density Estimation PDF function
147  TF1* fUpperPDF; // Output Kernel Density Estimation upper confidence interval PDF function
148  TF1* fLowerPDF; // Output Kernel Density Estimation lower confidence interval PDF function
149  TF1* fApproximateBias; // Output Kernel Density Estimation approximate bias
150  TGraphErrors* fGraph; // Graph with the errors
151 
156 
159  Bool_t fNewData; // flag to control when new data are given
160  Bool_t fUseMinMaxFromData; // flag top control if min and max must be used from data
161 
162  UInt_t fNBins; // Number of bins for binned data option
163  UInt_t fNEvents; // Data's number of events
164  Double_t fSumOfCounts; // Data sum of weights
165  UInt_t fUseBinsNEvents; // If the algorithm is allowed to use binning this is the minimum number of events to do so
166 
167  Double_t fMean; // Data mean
168  Double_t fSigma; // Data std deviation
169  Double_t fSigmaRob; // Data std deviation (robust estimation)
170  Double_t fXMin; // Data minimum value
171  Double_t fXMax; // Data maximum value
172  Double_t fRho; // Adjustment factor for sigma
173  Double_t fAdaptiveBandwidthFactor; // Geometric mean of the kernel density estimation from the data for adaptive iteration
174 
175  Double_t fWeightSize; // Caches the weight size
176 
177  std::vector<Double_t> fCanonicalBandwidths;
178  std::vector<Double_t> fKernelSigmas2;
179 
180  std::vector<Double_t> fBinCount; // Number of events per bin for binned data option
181 
182  std::vector<Bool_t> fSettedOptions; // User input options flag
183 
185  friend struct KernelIntegrand;
186 
187  void Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t* data, const Double_t* weight,
188  Double_t xMin, Double_t xMax, const Option_t* option, Double_t rho);
189 
190  inline Double_t GaussianKernel(Double_t x) const {
191  // Returns the kernel evaluation at x
192  Double_t k2_PI_ROOT_INV = 0.398942280401432703; // (2 * M_PI)**-0.5
193  return (x > -9. && x < 9.) ? k2_PI_ROOT_INV * std::exp(-.5 * x * x) : 0.0;
194  }
196  return (x > -1. && x < 1.) ? 3. / 4. * (1. - x * x) : 0.0;
197  }
198  inline Double_t BiweightKernel(Double_t x) const {
199  // Returns the kernel evaluation at x
200  return (x > -1. && x < 1.) ? 15. / 16. * (1. - x * x) * (1. - x * x) : 0.0;
201  }
203  // Returns the kernel evaluation at x
204  return (x > -1. && x < 1.) ? M_PI_4 * std::cos(M_PI_2 * x) : 0.0;
205  }
206  Double_t UpperConfidenceInterval(const Double_t* x, const Double_t* p) const; // Valid if the bandwidth is small compared to nEvents**1/5
207  Double_t LowerConfidenceInterval(const Double_t* x, const Double_t* p) const; // Valid if the bandwidth is small compared to nEvents**1/5
208  Double_t ApproximateBias(const Double_t* x, const Double_t* ) const { return GetBias(*x); }
211  Double_t ComputeKernelMu() const;
214  void ComputeDataStats() ;
215 
216  UInt_t Index(Double_t x) const;
217 
219  void SetBinCountData();
220  void CheckKernelValidity();
222  void SetUserKernelSigma2();
223  void SetCanonicalBandwidths();
224  void SetKernelSigmas2();
225  void SetHistogram();
226  void SetUseBins();
227  void SetMirror();
228  void SetMean();
229  void SetSigma(Double_t R);
230  void SetKernel();
231  void SetKernelFunction(KernelFunction_Ptr kernfunc = 0);
232  void SetOptions(const Option_t* option, Double_t rho);
233  void CheckOptions(Bool_t isUserDefinedKernel = kFALSE);
234  void GetOptions(std::string optionType, std::string option);
235  void AssureOptions();
236  void SetData(const Double_t* data, const Double_t * weights);
237  void InitFromNewData();
238  void SetMirroredEvents();
239  void SetDrawOptions(const Option_t* option, TString& plotOpt, TString& drawOpt);
240  void DrawErrors(TString& drawOpt);
241  void DrawConfidenceInterval(TString& drawOpt, double cl=0.95);
242 
243  TF1* GetKDEFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
244  TF1* GetKDEApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
245  // The density to estimate should be at least twice differentiable.
246  TF1* GetPDFUpperConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
247  TF1* GetPDFLowerConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
248 
249  ClassDef(TKDE, 2) // One dimensional semi-parametric Kernel Density Estimation
250 
251 };
252 
253 #endif
Double_t EpanechnikovKernel(Double_t x) const
Definition: TKDE.h:195
Double_t ComputeKernelSigma2() const
Definition: TKDE.cxx:1029
Double_t LowerConfidenceInterval(const Double_t *x, const Double_t *p) const
Definition: TKDE.cxx:968
void SetMean()
Definition: TKDE.cxx:531
Bool_t fUseMinMaxFromData
Definition: TKDE.h:160
TF1 * GetDrawnLowerFunction()
Definition: TKDE.h:123
Bool_t fUseMirroring
Definition: TKDE.h:157
float xmin
Definition: THbookFile.cxx:93
void SetUserKernelSigma2()
Definition: TKDE.cxx:1101
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
Definition: IFunction.h:134
TF1 * GetFunction(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:617
friend struct KernelIntegrand
Definition: TKDE.h:184
Kernel Density Estimation class.
Definition: TKDE.h:31
void SetRange(Double_t xMin, Double_t xMax)
Definition: TKDE.cxx:404
void SetKernel()
Definition: TKDE.cxx:542
const Double_t * GetAdaptiveWeights() const
Definition: TKDE.cxx:885
Bool_t fAsymRight
Definition: TKDE.h:157
EKernelType
Definition: TKDE.h:34
const char Option_t
Definition: RtypesCore.h:62
TF1 * GetLowerFunction(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:631
void SetKernelSigmas2()
Definition: TKDE.cxx:609
void InitFromNewData()
Definition: TKDE.cxx:480
void GetOptions(std::string optionType, std::string option)
Definition: TKDE.cxx:223
Double_t ApproximateBias(const Double_t *x, const Double_t *) const
Definition: TKDE.h:208
UInt_t fNEvents
Definition: TKDE.h:163
void SetIteration(EIteration iter)
Definition: TKDE.cxx:340
Basic string class.
Definition: TString.h:129
EMirror fMirror
Definition: TKDE.h:154
void ComputeDataStats()
Definition: TKDE.cxx:1056
bool Bool_t
Definition: RtypesCore.h:59
std::vector< Double_t > fKernelSigmas2
Definition: TKDE.h:178
int nbins[3]
Bool_t fMirrorRight
Definition: TKDE.h:157
virtual void Draw(const Option_t *option="")
Definition: TKDE.cxx:777
TF1 * fPDF
Definition: TKDE.h:146
ROOT::Math::IBaseFunctionOneDim * KernelFunction_Ptr
Definition: TKDE.h:134
Double_t BiweightKernel(Double_t x) const
Definition: TKDE.h:198
Double_t fRho
Definition: TKDE.h:172
double cos(double)
Double_t GetFixedWeight() const
Definition: TKDE.cxx:874
TF1 * GetPDFLowerConfidenceInterval(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:1152
void SetKernelType(EKernelType kern)
Definition: TKDE.cxx:329
std::vector< Double_t > fCanonicalBandwidths
Definition: TKDE.h:177
Double_t operator()(Double_t x) const
Definition: TKDE.cxx:669
TKDE(UInt_t events=0, const Double_t *data=0, Double_t xMin=0.0, Double_t xMax=0.0, const Option_t *option="KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho=1.0)
Definition: TKDE.h:66
Template class to wrap any C++ callable object which takes one argument i.e.
TF1 * GetDrawnUpperFunction()
Definition: TKDE.h:122
Double_t GetError(Double_t x) const
Definition: TKDE.cxx:987
TF1 * GetApproximateBias(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:636
void SetCanonicalBandwidths()
Definition: TKDE.cxx:600
TF1 * fUpperPDF
Definition: TKDE.h:147
UInt_t Index(Double_t x) const
Definition: TKDE.cxx:944
Double_t x[n]
Definition: legend1.C:17
TGraphErrors * GetGraphWithErrors(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:835
std::vector< Double_t > fBinCount
Definition: TKDE.h:180
#define ClassDef(name, id)
Definition: Rtypes.h:297
TF1 * fApproximateBias
Definition: TKDE.h:149
TF1 * GetUpperFunction(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:626
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:29
TKDE(UInt_t events, const Double_t *data, const Double_t *dataWeight, Double_t xMin=0.0, Double_t xMax=0.0, const Option_t *option="KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho=1.0)
Definition: TKDE.h:71
EBinning
Definition: TKDE.h:60
void SetKernelFunction(KernelFunction_Ptr kernfunc=0)
Definition: TKDE.cxx:556
#define M_PI_2
Definition: Math.h:42
EKernelType fKernelType
Definition: TKDE.h:152
friend class TKernel
Definition: TKDE.h:137
EMirror
Definition: TKDE.h:48
std::vector< Double_t > fData
Definition: TKDE.h:142
void SetUseBinsNEvents(UInt_t nEvents)
Definition: TKDE.cxx:387
TGraphErrors * fGraph
Definition: TKDE.h:150
TGraphErrors * GetDrawnGraph()
Definition: TKDE.h:124
TKDE operator=(TKDE &kde)
void SetMirror()
Definition: TKDE.cxx:437
TF1 * GetKDEFunction(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:1125
const int nEvents
Definition: testRooFit.cxx:42
void Fill(Double_t data)
Definition: TKDE.cxx:641
TKernel * fKernel
Definition: TKDE.h:140
void CheckKernelValidity()
Definition: TKDE.cxx:996
Double_t ComputeKernelMu() const
Definition: TKDE.cxx:1038
Double_t fXMin
Definition: TKDE.h:170
Double_t GetRAMISE() const
Definition: TKDE.cxx:687
virtual ~TKDE()
Definition: TKDE.cxx:105
void SetOptions(const Option_t *option, Double_t rho)
Definition: TKDE.cxx:153
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t fSumOfCounts
Definition: TKDE.h:164
std::vector< Double_t > fEventWeights
Definition: TKDE.h:144
Double_t GetValue(Double_t x) const
Definition: TKDE.h:103
float xmax
Definition: THbookFile.cxx:93
Double_t ComputeKernelIntegral() const
Definition: TKDE.cxx:1047
TKDE(const Char_t *, const KernelFunction &kernfunc, UInt_t events, const Double_t *data, const Double_t *dataWeight, Double_t xMin=0.0, Double_t xMax=0.0, const Option_t *option="KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho=1.0)
Definition: TKDE.h:81
void SetSigma(Double_t R)
Definition: TKDE.cxx:536
Bool_t fMirrorLeft
Definition: TKDE.h:157
Double_t fWeightSize
Definition: TKDE.h:175
Bool_t fAsymLeft
Definition: TKDE.h:157
Bool_t fNewData
Definition: TKDE.h:159
const Bool_t kFALSE
Definition: RtypesCore.h:92
TF1 * fLowerPDF
Definition: TKDE.h:148
void SetNBins(UInt_t nbins)
Definition: TKDE.cxx:368
#define M_PI_4
Definition: Math.h:46
void SetHistogram()
Double_t fMean
Definition: TKDE.h:167
std::vector< Bool_t > fSettedOptions
Definition: TKDE.h:182
double Double_t
Definition: RtypesCore.h:55
EIteration fIteration
Definition: TKDE.h:153
void SetBinning(EBinning)
Definition: TKDE.cxx:360
Bool_t fUseBins
Definition: TKDE.h:158
std::vector< Double_t > fEvents
Definition: TKDE.h:143
Double_t fSigma
Definition: TKDE.h:168
TF1 * GetPDFUpperConfidenceInterval(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:1139
KernelFunction_Ptr fKernelFunction
Definition: TKDE.h:135
void DrawErrors(TString &drawOpt)
Definition: TKDE.cxx:828
Double_t ComputeKernelL2Norm() const
Definition: TKDE.cxx:1020
Double_t GetSigma() const
Definition: TKDE.cxx:681
EIteration
Definition: TKDE.h:43
UInt_t fUseBinsNEvents
Definition: TKDE.h:165
UInt_t fNBins
Definition: TKDE.h:162
void SetUseBins()
Definition: TKDE.cxx:418
Double_t fSigmaRob
Definition: TKDE.h:169
void SetBinCountData()
Definition: TKDE.cxx:739
char Char_t
Definition: RtypesCore.h:29
void CheckOptions(Bool_t isUserDefinedKernel=kFALSE)
Definition: TKDE.cxx:306
EBinning fBinning
Definition: TKDE.h:155
void AssureOptions()
Definition: TKDE.cxx:290
TKDE(const Char_t *, const KernelFunction &kernfunc, UInt_t events, const Double_t *data, Double_t xMin=0.0, Double_t xMax=0.0, const Option_t *option="KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho=1.0)
Definition: TKDE.h:77
Double_t GetMean() const
Definition: TKDE.cxx:675
void Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t *data, const Double_t *weight, Double_t xMin, Double_t xMax, const Option_t *option, Double_t rho)
Definition: TKDE.cxx:116
TF1 * GetDrawnFunction()
Definition: TKDE.h:121
Double_t fAdaptiveBandwidthFactor
Definition: TKDE.h:173
void SetMirroredEvents()
Definition: TKDE.cxx:500
void SetUserCanonicalBandwidth()
Definition: TKDE.cxx:1096
1-Dim function class
Definition: TF1.h:150
TF1 * GetKDEApproximateBias(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:1165
A TGraphErrors is a TGraph with error bars.
Definition: TGraphErrors.h:26
void SetTuneFactor(Double_t rho)
Definition: TKDE.cxx:394
Double_t CosineArchKernel(Double_t x) const
Definition: TKDE.h:202
void SetData(const Double_t *data, const Double_t *weights)
Definition: TKDE.cxx:446
Double_t UpperConfidenceInterval(const Double_t *x, const Double_t *p) const
Definition: TKDE.cxx:959
void DrawConfidenceInterval(TString &drawOpt, double cl=0.95)
Definition: TKDE.cxx:859
Double_t GaussianKernel(Double_t x) const
Definition: TKDE.h:190
double exp(double)
void SetBinCentreData(Double_t xmin, Double_t xmax)
Definition: TKDE.cxx:730
TRandom3 R
a TMatrixD.
Definition: testIO.cxx:28
Double_t fXMax
Definition: TKDE.h:171
Double_t ComputeMidspread()
Definition: TKDE.cxx:1085
void SetDrawOptions(const Option_t *option, TString &plotOpt, TString &drawOpt)
Definition: TKDE.cxx:179
Double_t GetBias(Double_t x) const
Definition: TKDE.cxx:978