Logo ROOT   6.08/07
Reference Guide
TKDE.h
Go to the documentation of this file.
1 // @(#)root/hist:$Id$
2 // Authors: Bartolomeu Rabacal 07/2010
3 /**********************************************************************
4  * *
5  * Copyright (c) 2006 , LCG ROOT MathLib Team *
6  * *
7  * *
8  **********************************************************************/
9 // Header file for TKDE
10 
11 #ifndef ROOT_TKDE
12 #define ROOT_TKDE
13 
14 #ifndef ROOT_Math_WrappedFunction
15  #include "Math/WrappedFunction.h"
16 #endif
17 
18 #ifndef ROOT_TNamed
19  #include "TNamed.h"
20 #endif
21 
22 #ifndef ROOT_Math_Math
23 #include "Math/Math.h"
24 #endif
25 
26 //#include "TF1.h"
27 class TGraphErrors;
28 class TF1;
29 
30 /*
31  Kernel Density Estimation class. The three main references are (1) "Scott DW, Multivariate Density Estimation.
32 Theory, Practice and Visualization. New York: Wiley", (2) "Jann Ben - ETH Zurich, Switzerland -, Univariate kernel density estimation document for KDENS: Stata module for univariate kernel density estimation." and (3) "Hardle W, Muller M, Sperlich S, Werwatz A, Nonparametric and Semiparametric Models. Springer."
33  The algorithm is briefly described in (4) "Cranmer KS, Kernel Estimation in High-Energy
34 Physics. Computer Physics Communications 136:198-207,2001" - e-Print Archive: hep ex/0011057.
35  A binned version is also implemented to address the performance issue due to its data size dependence.
36 */
37 class TKDE : public TNamed {
38 public:
39 
40  enum EKernelType { // Kernel function type option
45  kUserDefined, // Internal use only for the class's template constructor
46  kTotalKernels // Internal use only for member initialization
47  };
48 
49  enum EIteration { // KDE fitting option
52  };
53 
54  enum EMirror { // Data "mirroring" option to address the probability "spill out" boundary effect
64  };
65 
66  enum EBinning{ // Data binning option
68  kRelaxedBinning, // The algorithm is allowed to use binning if the data is large enough
70  };
71 
72  explicit TKDE(UInt_t events = 0, const Double_t* data = 0, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option =
73  "KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
74  Instantiate( nullptr, events, data, nullptr, xMin, xMax, option, rho);
75  }
76 
77  TKDE(UInt_t events, const Double_t* data, const Double_t* dataWeight, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option =
78  "KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
79  Instantiate( nullptr, events, data, dataWeight, xMin, xMax, option, rho);
80  }
81 
82  template<class KernelFunction>
83  TKDE(const Char_t* /*name*/, const KernelFunction& kernfunc, UInt_t events, const Double_t* data, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
84  Instantiate(new ROOT::Math::WrappedFunction<const KernelFunction&>(kernfunc), events, data, nullptr, xMin, xMax, option, rho);
85  }
86  template<class KernelFunction>
87  TKDE(const Char_t* /*name*/, const KernelFunction& kernfunc, UInt_t events, const Double_t* data, const Double_t * dataWeight, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
88  Instantiate(new ROOT::Math::WrappedFunction<const KernelFunction&>(kernfunc), events, data, dataWeight, xMin, xMax, option, rho);
89  }
90 
91  virtual ~TKDE();
92 
93  void Fill(Double_t data);
94  void Fill(Double_t data, Double_t weight);
95  void SetKernelType(EKernelType kern);
96  void SetIteration(EIteration iter);
97  void SetMirror(EMirror mir);
98  void SetBinning(EBinning);
99  void SetNBins(UInt_t nbins);
101  void SetTuneFactor(Double_t rho);
102  void SetRange(Double_t xMin, Double_t xMax); // By default computed from the data
103 
104  virtual void Draw(const Option_t* option = "");
105 
106  Double_t operator()(Double_t x) const;
107  Double_t operator()(const Double_t* x, const Double_t* p=0) const; // Needed for creating TF1
108 
109  Double_t GetValue(Double_t x) const { return (*this)(x); }
110  Double_t GetError(Double_t x) const;
111 
112  Double_t GetBias(Double_t x) const;
113  Double_t GetMean() const;
114  Double_t GetSigma() const;
115  Double_t GetRAMISE() const;
116 
117  Double_t GetFixedWeight() const;
118 
119  TF1* GetFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
120  TF1* GetUpperFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
121  TF1* GetLowerFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
122  TF1* GetApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
123  TGraphErrors * GetGraphWithErrors(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
124 
125  // get the drawn object to chanage settings
126  // These objects are managed by TKDE and should not be deleted by the user
127  TF1 * GetDrawnFunction() { return fPDF;}
131 
132  const Double_t * GetAdaptiveWeights() const;
133 
134 
135 private:
136 
137  TKDE(TKDE& kde); // Disallowed copy constructor
138  TKDE operator=(TKDE& kde); // Disallowed assign operator
139 
141  KernelFunction_Ptr fKernelFunction;
142 
143  class TKernel;
144  friend class TKernel;
145 
147 
148  std::vector<Double_t> fData; // Data events
149  std::vector<Double_t> fEvents; // Original data storage
150  std::vector<Double_t> fEventWeights; // Original data weights
151 
152  TF1* fPDF; // Output Kernel Density Estimation PDF function
153  TF1* fUpperPDF; // Output Kernel Density Estimation upper confidence interval PDF function
154  TF1* fLowerPDF; // Output Kernel Density Estimation lower confidence interval PDF function
155  TF1* fApproximateBias; // Output Kernel Density Estimation approximate bias
156  TGraphErrors* fGraph; // Graph with the errors
157 
162 
165  Bool_t fNewData; // flag to control when new data are given
166  Bool_t fUseMinMaxFromData; // flag top control if min and max must be used from data
167 
168  UInt_t fNBins; // Number of bins for binned data option
169  UInt_t fNEvents; // Data's number of events
170  Double_t fSumOfCounts; // Data sum of weights
171  UInt_t fUseBinsNEvents; // If the algorithm is allowed to use binning this is the minimum number of events to do so
172 
173  Double_t fMean; // Data mean
174  Double_t fSigma; // Data std deviation
175  Double_t fSigmaRob; // Data std deviation (robust estimation)
176  Double_t fXMin; // Data minimum value
177  Double_t fXMax; // Data maximum value
178  Double_t fRho; // Adjustment factor for sigma
179  Double_t fAdaptiveBandwidthFactor; // Geometric mean of the kernel density estimation from the data for adaptive iteration
180 
181  Double_t fWeightSize; // Caches the weight size
182 
183  std::vector<Double_t> fCanonicalBandwidths;
184  std::vector<Double_t> fKernelSigmas2;
185 
186  std::vector<Double_t> fBinCount; // Number of events per bin for binned data option
187 
188  std::vector<Bool_t> fSettedOptions; // User input options flag
189 
191  friend struct KernelIntegrand;
192 
193  void Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t* data, const Double_t* weight,
194  Double_t xMin, Double_t xMax, const Option_t* option, Double_t rho);
195 
196  inline Double_t GaussianKernel(Double_t x) const {
197  // Returns the kernel evaluation at x
198  Double_t k2_PI_ROOT_INV = 0.398942280401432703; // (2 * M_PI)**-0.5
199  return (x > -9. && x < 9.) ? k2_PI_ROOT_INV * std::exp(-.5 * x * x) : 0.0;
200  }
202  return (x > -1. && x < 1.) ? 3. / 4. * (1. - x * x) : 0.0;
203  }
204  inline Double_t BiweightKernel(Double_t x) const {
205  // Returns the kernel evaluation at x
206  return (x > -1. && x < 1.) ? 15. / 16. * (1. - x * x) * (1. - x * x) : 0.0;
207  }
209  // Returns the kernel evaluation at x
210  return (x > -1. && x < 1.) ? M_PI_4 * std::cos(M_PI_2 * x) : 0.0;
211  }
212  Double_t UpperConfidenceInterval(const Double_t* x, const Double_t* p) const; // Valid if the bandwidth is small compared to nEvents**1/5
213  Double_t LowerConfidenceInterval(const Double_t* x, const Double_t* p) const; // Valid if the bandwidth is small compared to nEvents**1/5
214  Double_t ApproximateBias(const Double_t* x, const Double_t* ) const { return GetBias(*x); }
217  Double_t ComputeKernelMu() const;
220  void ComputeDataStats() ;
221 
222  UInt_t Index(Double_t x) const;
223 
225  void SetBinCountData();
226  void CheckKernelValidity();
228  void SetUserKernelSigma2();
229  void SetCanonicalBandwidths();
230  void SetKernelSigmas2();
231  void SetHistogram();
232  void SetUseBins();
233  void SetMirror();
234  void SetMean();
235  void SetSigma(Double_t R);
236  void SetKernel();
237  void SetKernelFunction(KernelFunction_Ptr kernfunc = 0);
238  void SetOptions(const Option_t* option, Double_t rho);
239  void CheckOptions(Bool_t isUserDefinedKernel = kFALSE);
240  void GetOptions(std::string optionType, std::string option);
241  void AssureOptions();
242  void SetData(const Double_t* data, const Double_t * weights);
243  void InitFromNewData();
244  void SetMirroredEvents();
245  void SetDrawOptions(const Option_t* option, TString& plotOpt, TString& drawOpt);
246  void DrawErrors(TString& drawOpt);
247  void DrawConfidenceInterval(TString& drawOpt, double cl=0.95);
248 
249  TF1* GetKDEFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
250  TF1* GetKDEApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
251  // The density to estimate should be at least twice differentiable.
252  TF1* GetPDFUpperConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
253  TF1* GetPDFLowerConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
254 
255  ClassDef(TKDE, 2) // One dimensional semi-parametric Kernel Density Estimation
256 
257 };
258 
259 #endif
Double_t EpanechnikovKernel(Double_t x) const
Definition: TKDE.h:201
Double_t ComputeKernelSigma2() const
Definition: TKDE.cxx:1029
Double_t LowerConfidenceInterval(const Double_t *x, const Double_t *p) const
Definition: TKDE.cxx:968
void SetMean()
Definition: TKDE.cxx:531
Bool_t fUseMinMaxFromData
Definition: TKDE.h:166
TF1 * GetDrawnLowerFunction()
Definition: TKDE.h:129
Bool_t fUseMirroring
Definition: TKDE.h:163
float xmin
Definition: THbookFile.cxx:93
void SetUserKernelSigma2()
Definition: TKDE.cxx:1101
Interface (abstract class) for generic functions objects of one-dimension Provides a method to evalua...
Definition: IFunction.h:133
TF1 * GetFunction(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:617
friend struct KernelIntegrand
Definition: TKDE.h:190
Kernel Density Estimation class.
Definition: TKDE.h:37
void SetRange(Double_t xMin, Double_t xMax)
Definition: TKDE.cxx:404
void SetKernel()
Definition: TKDE.cxx:542
const Double_t * GetAdaptiveWeights() const
Definition: TKDE.cxx:885
Bool_t fAsymRight
Definition: TKDE.h:163
EKernelType
Definition: TKDE.h:40
const char Option_t
Definition: RtypesCore.h:62
TF1 * GetLowerFunction(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:631
void SetKernelSigmas2()
Definition: TKDE.cxx:609
void InitFromNewData()
Definition: TKDE.cxx:480
void GetOptions(std::string optionType, std::string option)
Definition: TKDE.cxx:223
Double_t ApproximateBias(const Double_t *x, const Double_t *) const
Definition: TKDE.h:214
UInt_t fNEvents
Definition: TKDE.h:169
void SetIteration(EIteration iter)
Definition: TKDE.cxx:340
Basic string class.
Definition: TString.h:137
EMirror fMirror
Definition: TKDE.h:160
void ComputeDataStats()
Definition: TKDE.cxx:1056
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
std::vector< Double_t > fKernelSigmas2
Definition: TKDE.h:184
int nbins[3]
Bool_t fMirrorRight
Definition: TKDE.h:163
virtual void Draw(const Option_t *option="")
Definition: TKDE.cxx:777
TF1 * fPDF
Definition: TKDE.h:152
ROOT::Math::IBaseFunctionOneDim * KernelFunction_Ptr
Definition: TKDE.h:140
Double_t BiweightKernel(Double_t x) const
Definition: TKDE.h:204
Double_t fRho
Definition: TKDE.h:178
double cos(double)
Double_t GetFixedWeight() const
Definition: TKDE.cxx:874
TF1 * GetPDFLowerConfidenceInterval(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:1152
void SetKernelType(EKernelType kern)
Definition: TKDE.cxx:329
std::vector< Double_t > fCanonicalBandwidths
Definition: TKDE.h:183
Double_t operator()(Double_t x) const
Definition: TKDE.cxx:669
TKDE(UInt_t events=0, const Double_t *data=0, Double_t xMin=0.0, Double_t xMax=0.0, const Option_t *option="KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho=1.0)
Definition: TKDE.h:72
Template class to wrap any C++ callable object which takes one argument i.e.
TF1 * GetDrawnUpperFunction()
Definition: TKDE.h:128
Double_t GetError(Double_t x) const
Definition: TKDE.cxx:987
TF1 * GetApproximateBias(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:636
void SetCanonicalBandwidths()
Definition: TKDE.cxx:600
TF1 * fUpperPDF
Definition: TKDE.h:153
UInt_t Index(Double_t x) const
Definition: TKDE.cxx:944
Double_t x[n]
Definition: legend1.C:17
TGraphErrors * GetGraphWithErrors(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:835
std::vector< Double_t > fBinCount
Definition: TKDE.h:186
#define ClassDef(name, id)
Definition: Rtypes.h:254
TF1 * fApproximateBias
Definition: TKDE.h:155
TF1 * GetUpperFunction(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:626
The TNamed class is the base class for all named ROOT classes.
Definition: TNamed.h:33
TKDE(UInt_t events, const Double_t *data, const Double_t *dataWeight, Double_t xMin=0.0, Double_t xMax=0.0, const Option_t *option="KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho=1.0)
Definition: TKDE.h:77
EBinning
Definition: TKDE.h:66
void SetKernelFunction(KernelFunction_Ptr kernfunc=0)
Definition: TKDE.cxx:556
#define M_PI_2
Definition: Math.h:42
EKernelType fKernelType
Definition: TKDE.h:158
friend class TKernel
Definition: TKDE.h:143
EMirror
Definition: TKDE.h:54
std::vector< Double_t > fData
Definition: TKDE.h:148
void SetUseBinsNEvents(UInt_t nEvents)
Definition: TKDE.cxx:387
TGraphErrors * fGraph
Definition: TKDE.h:156
TGraphErrors * GetDrawnGraph()
Definition: TKDE.h:130
TKDE operator=(TKDE &kde)
void SetMirror()
Definition: TKDE.cxx:437
TF1 * GetKDEFunction(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:1125
const int nEvents
Definition: testRooFit.cxx:42
void Fill(Double_t data)
Definition: TKDE.cxx:641
TKernel * fKernel
Definition: TKDE.h:146
void CheckKernelValidity()
Definition: TKDE.cxx:996
Double_t ComputeKernelMu() const
Definition: TKDE.cxx:1038
Double_t fXMin
Definition: TKDE.h:176
Double_t GetRAMISE() const
Definition: TKDE.cxx:687
virtual ~TKDE()
Definition: TKDE.cxx:105
void SetOptions(const Option_t *option, Double_t rho)
Definition: TKDE.cxx:153
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t fSumOfCounts
Definition: TKDE.h:170
std::vector< Double_t > fEventWeights
Definition: TKDE.h:150
Double_t GetValue(Double_t x) const
Definition: TKDE.h:109
float xmax
Definition: THbookFile.cxx:93
Double_t ComputeKernelIntegral() const
Definition: TKDE.cxx:1047
TKDE(const Char_t *, const KernelFunction &kernfunc, UInt_t events, const Double_t *data, const Double_t *dataWeight, Double_t xMin=0.0, Double_t xMax=0.0, const Option_t *option="KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho=1.0)
Definition: TKDE.h:87
void SetSigma(Double_t R)
Definition: TKDE.cxx:536
Bool_t fMirrorLeft
Definition: TKDE.h:163
Double_t fWeightSize
Definition: TKDE.h:181
Bool_t fAsymLeft
Definition: TKDE.h:163
Bool_t fNewData
Definition: TKDE.h:165
TF1 * fLowerPDF
Definition: TKDE.h:154
void SetNBins(UInt_t nbins)
Definition: TKDE.cxx:368
#define M_PI_4
Definition: Math.h:46
void SetHistogram()
Double_t fMean
Definition: TKDE.h:173
std::vector< Bool_t > fSettedOptions
Definition: TKDE.h:188
double Double_t
Definition: RtypesCore.h:55
EIteration fIteration
Definition: TKDE.h:159
void SetBinning(EBinning)
Definition: TKDE.cxx:360
Bool_t fUseBins
Definition: TKDE.h:164
std::vector< Double_t > fEvents
Definition: TKDE.h:149
Double_t fSigma
Definition: TKDE.h:174
TF1 * GetPDFUpperConfidenceInterval(Double_t confidenceLevel=0.95, UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:1139
KernelFunction_Ptr fKernelFunction
Definition: TKDE.h:141
void DrawErrors(TString &drawOpt)
Definition: TKDE.cxx:828
Double_t ComputeKernelL2Norm() const
Definition: TKDE.cxx:1020
Double_t GetSigma() const
Definition: TKDE.cxx:681
EIteration
Definition: TKDE.h:49
UInt_t fUseBinsNEvents
Definition: TKDE.h:171
UInt_t fNBins
Definition: TKDE.h:168
void SetUseBins()
Definition: TKDE.cxx:418
Double_t fSigmaRob
Definition: TKDE.h:175
void SetBinCountData()
Definition: TKDE.cxx:739
char Char_t
Definition: RtypesCore.h:29
void CheckOptions(Bool_t isUserDefinedKernel=kFALSE)
Definition: TKDE.cxx:306
EBinning fBinning
Definition: TKDE.h:161
void AssureOptions()
Definition: TKDE.cxx:290
TKDE(const Char_t *, const KernelFunction &kernfunc, UInt_t events, const Double_t *data, Double_t xMin=0.0, Double_t xMax=0.0, const Option_t *option="KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho=1.0)
Definition: TKDE.h:83
Double_t GetMean() const
Definition: TKDE.cxx:675
void Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t *data, const Double_t *weight, Double_t xMin, Double_t xMax, const Option_t *option, Double_t rho)
Definition: TKDE.cxx:116
TF1 * GetDrawnFunction()
Definition: TKDE.h:127
Double_t fAdaptiveBandwidthFactor
Definition: TKDE.h:179
void SetMirroredEvents()
Definition: TKDE.cxx:500
void SetUserCanonicalBandwidth()
Definition: TKDE.cxx:1096
1-Dim function class
Definition: TF1.h:149
TF1 * GetKDEApproximateBias(UInt_t npx=100, Double_t xMin=1.0, Double_t xMax=0.0)
Definition: TKDE.cxx:1165
A TGraphErrors is a TGraph with error bars.
Definition: TGraphErrors.h:28
void SetTuneFactor(Double_t rho)
Definition: TKDE.cxx:394
Double_t CosineArchKernel(Double_t x) const
Definition: TKDE.h:208
void SetData(const Double_t *data, const Double_t *weights)
Definition: TKDE.cxx:446
Double_t UpperConfidenceInterval(const Double_t *x, const Double_t *p) const
Definition: TKDE.cxx:959
void DrawConfidenceInterval(TString &drawOpt, double cl=0.95)
Definition: TKDE.cxx:859
Double_t GaussianKernel(Double_t x) const
Definition: TKDE.h:196
double exp(double)
void SetBinCentreData(Double_t xmin, Double_t xmax)
Definition: TKDE.cxx:730
TRandom3 R
a TMatrixD.
Definition: testIO.cxx:28
Double_t fXMax
Definition: TKDE.h:177
Double_t ComputeMidspread()
Definition: TKDE.cxx:1085
void SetDrawOptions(const Option_t *option, TString &plotOpt, TString &drawOpt)
Definition: TKDE.cxx:179
Double_t GetBias(Double_t x) const
Definition: TKDE.cxx:978