ROOT logo
// @(#)root/hist:$Id: TKDE.h 37543 2010-12-11 14:14:05Z moneta $
// Authors: Bartolomeu Rabacal    07/2010
/**********************************************************************
 *                                                                    *
 * Copyright (c) 2006 , LCG ROOT MathLib Team                         *
 *                                                                    *
 *                                                                    *
 **********************************************************************/
// Header file for TKDE

#ifndef ROOT_TKDE
#define ROOT_TKDE

#ifndef ROOT_Math_WrappedFunction
   #include "Math/WrappedFunction.h"
#endif

#ifndef ROOT_TNamed
   #include "TNamed.h"
#endif

#ifndef ROOT_Math_Math
#include "Math/Math.h"
#endif

//#include "TF1.h"
class TGraphErrors;
class TF1;

/*
   Kernel Density Estimation class. The three main references are (1) "Scott DW, Multivariate Density Estimation.
Theory, Practice and Visualization. New York: Wiley", (2) "Jann Ben - ETH Zurich, Switzerland -, Univariate kernel density estimation document for KDENS: Stata module for univariate kernel density estimation." and (3) "Hardle W, Muller M, Sperlich S, Werwatz A, Nonparametric and Semiparametric Models. Springer."
   The algorithm is briefly described in (4) "Cranmer KS, Kernel Estimation in High-Energy
Physics. Computer Physics Communications 136:198-207,2001" - e-Print Archive: hep ex/0011057.
   A binned version is also implemented to address the performance issue due to its data size dependence.
*/
class TKDE : public TNamed  {
public:

   enum EKernelType { // Kernel function type option
      kGaussian,
      kEpanechnikov,
      kBiweight,
      kCosineArch,
      kUserDefined, // Internal use only for the class's template constructor
      kTotalKernels // Internal use only for member initialization
   };

   enum EIteration { // KDE fitting option
      kAdaptive,
      kFixed
   };

   enum EMirror { // Data "mirroring" option to address the probability "spill out" boundary effect
      kNoMirror,
      kMirrorLeft,
      kMirrorRight,
      kMirrorBoth,
      kMirrorAsymLeft,
      kMirrorAsymLeftRight,
      kMirrorAsymRight,
      kMirrorLeftAsymRight,
      kMirrorAsymBoth
   };

   enum EBinning{ // Data binning option
      kUnbinned,
      kRelaxedBinning, // The algorithm is allowed to use binning if the data is large enough
      kForcedBinning
   };

   explicit TKDE(UInt_t events = 0, const Double_t* data = 0, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0);

   template<class KernelFunction>
   TKDE(const Char_t* /*name*/, const KernelFunction& kernfunc, UInt_t events, const Double_t* data, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0)  {
      Instantiate(new ROOT::Math::WrappedFunction<const KernelFunction&>(kernfunc), events, data, xMin, xMax, option, rho);
   }

   virtual ~TKDE();

   void Fill(Double_t data);
   void SetKernelType(EKernelType kern);
   void SetIteration(EIteration iter);
   void SetMirror(EMirror mir);
   void SetBinning(EBinning);
   void SetNBins(UInt_t nbins);
   void SetUseBinsNEvents(UInt_t nEvents);
   void SetTuneFactor(Double_t rho);
   void SetRange(Double_t xMin, Double_t xMax); // By default computed from the data

   virtual void Draw(const Option_t* option = "");

   Double_t operator()(Double_t x) const;
   Double_t operator()(const Double_t* x, const Double_t* p=0) const;  // Needed for creating TF1

   Double_t GetValue(Double_t x) const { return (*this)(x); }
   Double_t GetError(Double_t x) const;

   Double_t GetBias(Double_t x) const;
   Double_t GetMean() const;
   Double_t GetSigma() const;
   Double_t GetRAMISE() const;

   Double_t GetFixedWeight() const;

   TF1* GetFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
   TF1* GetUpperFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
   TF1* GetLowerFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
   TF1* GetApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
   TGraphErrors * GetGraphWithErrors(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);

   // get the drawn object to chanage settings
   // These objects are managed by TKDE and should not be deleted by the user
   TF1 * GetDrawnFunction() { return fPDF;}  
   TF1 * GetDrawnUpperFunction() { return fUpperPDF;}  
   TF1 * GetDrawnLowerFunction() { return fLowerPDF;}  
   TGraphErrors * GetDrawnGraph() { return fGraph;}  

   const Double_t * GetAdaptiveWeights() const;


private:

   TKDE(TKDE& kde);           // Disallowed copy constructor
   TKDE operator=(TKDE& kde); // Disallowed assign operator

   typedef ROOT::Math::IBaseFunctionOneDim* KernelFunction_Ptr;
   KernelFunction_Ptr fKernelFunction;

   class TKernel;
   friend class TKernel;

   TKernel* fKernel;

   std::vector<Double_t> fData;   // Data events
   std::vector<Double_t> fEvents; // Original data storage

   TF1* fPDF;             // Output Kernel Density Estimation PDF function
   TF1* fUpperPDF;        // Output Kernel Density Estimation upper confidence interval PDF function
   TF1* fLowerPDF;        // Output Kernel Density Estimation lower confidence interval PDF function
   TF1* fApproximateBias; // Output Kernel Density Estimation approximate bias
   TGraphErrors* fGraph;  // Graph with the errors

   EKernelType fKernelType;
   EIteration fIteration;
   EMirror fMirror;
   EBinning fBinning;

   Bool_t fUseMirroring, fMirrorLeft, fMirrorRight, fAsymLeft, fAsymRight;
   Bool_t fUseBins;
   Bool_t fNewData;        // flag to control when new data are given
   Bool_t fUseMinMaxFromData; // flag top control if min and max must be used from data

   UInt_t fNBins;          // Number of bins for binned data option
   UInt_t fNEvents;        // Data's number of events
   UInt_t fUseBinsNEvents; // If the algorithm is allowed to use binning this is the minimum number of events to do so

   Double_t fMean;  // Data mean
   Double_t fSigma; // Data std deviation
   Double_t fSigmaRob; // Data std deviation (robust estimation)
   Double_t fXMin;  // Data minimum value
   Double_t fXMax;  // Data maximum value
   Double_t fRho;   // Adjustment factor for sigma
   Double_t fAdaptiveBandwidthFactor; // Geometric mean of the kernel density estimation from the data for adaptive iteration

   Double_t fWeightSize; // Caches the weight size

   std::vector<Double_t> fCanonicalBandwidths;
   std::vector<Double_t> fKernelSigmas2;

   std::vector<UInt_t> fBinCount; // Number of events per bin for binned data option

   std::vector<Bool_t> fSettedOptions; // User input options flag

   struct KernelIntegrand;
   friend struct KernelIntegrand;

   void Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t* data,
                    Double_t xMin, Double_t xMax, const Option_t* option, Double_t rho);

   inline Double_t GaussianKernel(Double_t x) const {
      // Returns the kernel evaluation at x
      Double_t k2_PI_ROOT_INV = 0.398942280401432703; // (2 * M_PI)**-0.5
      return (x > -9. && x < 9.) ? k2_PI_ROOT_INV * std::exp(-.5 * x * x) : 0.0;
   }
   inline Double_t EpanechnikovKernel(Double_t x) const {
      return (x > -1. &&  x < 1.) ? 3. / 4. * (1. - x * x) : 0.0;
   }
   inline Double_t BiweightKernel(Double_t x) const {
      // Returns the kernel evaluation at x
      return (x > -1. &&  x < 1.) ? 15. / 16. * (1. - x * x) * (1. - x * x) : 0.0;
   }
   inline Double_t CosineArchKernel(Double_t x) const {
      // Returns the kernel evaluation at x
      return (x > -1. &&  x < 1.) ? M_PI_4 * std::cos(M_PI_2 * x) : 0.0;
   }
   Double_t UpperConfidenceInterval(const Double_t* x, const Double_t* p) const; // Valid if the bandwidth is small compared to nEvents**1/5
   Double_t LowerConfidenceInterval(const Double_t* x, const Double_t* p) const; // Valid if the bandwidth is small compared to nEvents**1/5
   Double_t ApproximateBias(const Double_t* x, const Double_t* ) const { return GetBias(*x); }
   Double_t ComputeKernelL2Norm() const;
   Double_t ComputeKernelSigma2() const;
   Double_t ComputeKernelMu() const;
   Double_t ComputeKernelIntegral() const;
   Double_t ComputeMidspread() ;

   UInt_t Index(Double_t x) const;

   void SetBinCentreData(Double_t xmin, Double_t xmax);
   void SetBinCountData();
   void CheckKernelValidity();
   void SetCanonicalBandwidth();
   void SetKernelSigma2();
   void SetCanonicalBandwidths();
   void SetKernelSigmas2();
   void SetHistogram();
   void SetUseBins();
   void SetMirror();
   void SetMean();
   void SetSigma(Double_t R);
   void SetKernel();
   void SetKernelFunction(KernelFunction_Ptr kernfunc = 0);
   void SetOptions(const Option_t* option, Double_t rho);
   void CheckOptions(Bool_t isUserDefinedKernel = kFALSE);
   void GetOptions(std::string optionType, std::string option);
   void AssureOptions();
   void SetData(const Double_t* data);
   void InitFromNewData();
   void SetMirroredEvents();
   void SetDrawOptions(const Option_t* option, TString& plotOpt, TString& drawOpt);
   void DrawErrors(TString& drawOpt);
   void DrawConfidenceInterval(TString& drawOpt, double cl=0.95);

   TF1* GetKDEFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
   TF1* GetKDEApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
   // The density to estimate should be at least twice differentiable.
   TF1* GetPDFUpperConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
   TF1* GetPDFLowerConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);

   ClassDef(TKDE, 1) // One dimensional semi-parametric Kernel Density Estimation

};

#endif
 TKDE.h:1
 TKDE.h:2
 TKDE.h:3
 TKDE.h:4
 TKDE.h:5
 TKDE.h:6
 TKDE.h:7
 TKDE.h:8
 TKDE.h:9
 TKDE.h:10
 TKDE.h:11
 TKDE.h:12
 TKDE.h:13
 TKDE.h:14
 TKDE.h:15
 TKDE.h:16
 TKDE.h:17
 TKDE.h:18
 TKDE.h:19
 TKDE.h:20
 TKDE.h:21
 TKDE.h:22
 TKDE.h:23
 TKDE.h:24
 TKDE.h:25
 TKDE.h:26
 TKDE.h:27
 TKDE.h:28
 TKDE.h:29
 TKDE.h:30
 TKDE.h:31
 TKDE.h:32
 TKDE.h:33
 TKDE.h:34
 TKDE.h:35
 TKDE.h:36
 TKDE.h:37
 TKDE.h:38
 TKDE.h:39
 TKDE.h:40
 TKDE.h:41
 TKDE.h:42
 TKDE.h:43
 TKDE.h:44
 TKDE.h:45
 TKDE.h:46
 TKDE.h:47
 TKDE.h:48
 TKDE.h:49
 TKDE.h:50
 TKDE.h:51
 TKDE.h:52
 TKDE.h:53
 TKDE.h:54
 TKDE.h:55
 TKDE.h:56
 TKDE.h:57
 TKDE.h:58
 TKDE.h:59
 TKDE.h:60
 TKDE.h:61
 TKDE.h:62
 TKDE.h:63
 TKDE.h:64
 TKDE.h:65
 TKDE.h:66
 TKDE.h:67
 TKDE.h:68
 TKDE.h:69
 TKDE.h:70
 TKDE.h:71
 TKDE.h:72
 TKDE.h:73
 TKDE.h:74
 TKDE.h:75
 TKDE.h:76
 TKDE.h:77
 TKDE.h:78
 TKDE.h:79
 TKDE.h:80
 TKDE.h:81
 TKDE.h:82
 TKDE.h:83
 TKDE.h:84
 TKDE.h:85
 TKDE.h:86
 TKDE.h:87
 TKDE.h:88
 TKDE.h:89
 TKDE.h:90
 TKDE.h:91
 TKDE.h:92
 TKDE.h:93
 TKDE.h:94
 TKDE.h:95
 TKDE.h:96
 TKDE.h:97
 TKDE.h:98
 TKDE.h:99
 TKDE.h:100
 TKDE.h:101
 TKDE.h:102
 TKDE.h:103
 TKDE.h:104
 TKDE.h:105
 TKDE.h:106
 TKDE.h:107
 TKDE.h:108
 TKDE.h:109
 TKDE.h:110
 TKDE.h:111
 TKDE.h:112
 TKDE.h:113
 TKDE.h:114
 TKDE.h:115
 TKDE.h:116
 TKDE.h:117
 TKDE.h:118
 TKDE.h:119
 TKDE.h:120
 TKDE.h:121
 TKDE.h:122
 TKDE.h:123
 TKDE.h:124
 TKDE.h:125
 TKDE.h:126
 TKDE.h:127
 TKDE.h:128
 TKDE.h:129
 TKDE.h:130
 TKDE.h:131
 TKDE.h:132
 TKDE.h:133
 TKDE.h:134
 TKDE.h:135
 TKDE.h:136
 TKDE.h:137
 TKDE.h:138
 TKDE.h:139
 TKDE.h:140
 TKDE.h:141
 TKDE.h:142
 TKDE.h:143
 TKDE.h:144
 TKDE.h:145
 TKDE.h:146
 TKDE.h:147
 TKDE.h:148
 TKDE.h:149
 TKDE.h:150
 TKDE.h:151
 TKDE.h:152
 TKDE.h:153
 TKDE.h:154
 TKDE.h:155
 TKDE.h:156
 TKDE.h:157
 TKDE.h:158
 TKDE.h:159
 TKDE.h:160
 TKDE.h:161
 TKDE.h:162
 TKDE.h:163
 TKDE.h:164
 TKDE.h:165
 TKDE.h:166
 TKDE.h:167
 TKDE.h:168
 TKDE.h:169
 TKDE.h:170
 TKDE.h:171
 TKDE.h:172
 TKDE.h:173
 TKDE.h:174
 TKDE.h:175
 TKDE.h:176
 TKDE.h:177
 TKDE.h:178
 TKDE.h:179
 TKDE.h:180
 TKDE.h:181
 TKDE.h:182
 TKDE.h:183
 TKDE.h:184
 TKDE.h:185
 TKDE.h:186
 TKDE.h:187
 TKDE.h:188
 TKDE.h:189
 TKDE.h:190
 TKDE.h:191
 TKDE.h:192
 TKDE.h:193
 TKDE.h:194
 TKDE.h:195
 TKDE.h:196
 TKDE.h:197
 TKDE.h:198
 TKDE.h:199
 TKDE.h:200
 TKDE.h:201
 TKDE.h:202
 TKDE.h:203
 TKDE.h:204
 TKDE.h:205
 TKDE.h:206
 TKDE.h:207
 TKDE.h:208
 TKDE.h:209
 TKDE.h:210
 TKDE.h:211
 TKDE.h:212
 TKDE.h:213
 TKDE.h:214
 TKDE.h:215
 TKDE.h:216
 TKDE.h:217
 TKDE.h:218
 TKDE.h:219
 TKDE.h:220
 TKDE.h:221
 TKDE.h:222
 TKDE.h:223
 TKDE.h:224
 TKDE.h:225
 TKDE.h:226
 TKDE.h:227
 TKDE.h:228
 TKDE.h:229
 TKDE.h:230
 TKDE.h:231
 TKDE.h:232
 TKDE.h:233
 TKDE.h:234
 TKDE.h:235
 TKDE.h:236
 TKDE.h:237
 TKDE.h:238
 TKDE.h:239
 TKDE.h:240
 TKDE.h:241
 TKDE.h:242
 TKDE.h:243