Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
KDEKernel.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Asen Christov
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis *
6 * Package: TMVA *
7 * Class : TMVA::KDEKernel *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Asen Christov <christov@physik.uni-freiburg.de> - Freiburg U., Germany *
15 * *
16 * Copyright (c) 2005: *
17 * CERN, Switzerland *
18 * MPI-K Heidelberg, Germany *
19 * Freiburg U., Germany *
20 * *
21 * Redistribution and use in source and binary forms, with or without *
22 * modification, are permitted according to the terms listed in LICENSE *
23 * (see tmva/doc/LICENSE) *
24 **********************************************************************************/
25
26/*! \class TMVA::KDEKernel
27\ingroup TMVA
28
29KDE Kernel for "smoothing" the PDFs.
30
31*/
32
33#include "TH1.h"
34#include "TH1F.h"
35#include "TF1.h"
36
37#include "TMath.h"
38
39#include "TMVA/KDEKernel.h"
40#include "TMVA/MsgLogger.h"
41#include "TMVA/Types.h"
42
43
44////////////////////////////////////////////////////////////////////////////////
45/// constructor
46/// sanity check
47
50: fSigma( 1. ),
51 fIter ( kiter ),
52 fLowerEdge (lower_edge ),
53 fUpperEdge (upper_edge),
54 fFineFactor ( FineFactor ),
55 fKernel_integ ( 0 ),
56 fKDEborder ( kborder ),
57 fLogger( new MsgLogger("KDEKernel") )
58{
59 if (hist == NULL) {
60 Log() << kFATAL << "Called without valid histogram pointer (hist)!" << Endl;
61 }
62
63 fHist = (TH1F*)hist->Clone();
64 fFirstIterHist = (TH1F*)hist->Clone();
65 fFirstIterHist->Reset(); // now it is empty but with the proper binning
66 fSigmaHist = (TH1F*)hist->Clone();
67 fSigmaHist->Reset(); // now fSigmaHist is empty but with the proper binning
68
69 fHiddenIteration=false;
70}
71
72////////////////////////////////////////////////////////////////////////////////
73/// destructor
74
76{
77 if (fHist != NULL) delete fHist;
78 if (fFirstIterHist != NULL) delete fFirstIterHist;
79 if (fSigmaHist != NULL) delete fSigmaHist;
80 if (fKernel_integ != NULL) delete fKernel_integ;
81 delete fLogger;
82}
83
84////////////////////////////////////////////////////////////////////////////////
85/// when using Gaussian as Kernel function this is faster way to calculate the integrals
86
88{
89 if ( (par[1]<=0) || (x[0]>x[1])) return -1.;
90
91 Float_t xs1=(x[0]-par[0])/par[1];
92 Float_t xs2=(x[1]-par[0])/par[1];
93
94 if (xs1==0) {
95 if (xs2==0) return 0.;
96 if (xs2>0 ) return 0.5*TMath::Erf(xs2);
97 }
98 if (xs2==0) return 0.5*TMath::Erf(TMath::Abs(xs1));
99 if (xs1>0) return 0.5*(TMath::Erf(xs2)-TMath::Erf(xs1));
100 if (xs1<0) {
101 if (xs2>0 ) return 0.5*(TMath::Erf(xs2)+TMath::Erf(TMath::Abs(xs1)));
102 else return 0.5*(TMath::Erf(TMath::Abs(xs1))-TMath::Erf(TMath::Abs(xs2)));
103 }
104 return -1.;
105}
106
107////////////////////////////////////////////////////////////////////////////////
108/// fIter == 1 ---> nonadaptive KDE
109/// fIter == 2 ---> adaptive KDE
110
112{
113 if (ktype == kGauss) {
114
115 // i.e. gauss kernel
116 //
117 // this is going to be done for both (nonadaptive KDE and adaptive KDE)
118 // for nonadaptive KDE this is the only = final thing to do
119 // for adaptive KDE this is going to be used in the first (hidden) iteration
120 fKernel_integ = new TF1("GaussIntegral",GaussIntegral,fLowerEdge,fUpperEdge,4);
121 fSigma = ( TMath::Sqrt(2.0)
122 *TMath::Power(4./3., 0.2)
123 *fHist->GetRMS()
124 *TMath::Power(fHist->Integral(), -0.2) );
125 // this formula for sigma is valid for Gaussian Kernel function (nonadaptive KDE).
126 // formula found in:
127 // Multivariate Density Estimation, Theory, Practice and Visualization D. W. SCOTT, 1992 New York, Wiley
128 if (fSigma <= 0 ) {
129 Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
130 }
131 }
132
133 if (fIter == kAdaptiveKDE) {
134
135 // this is done only for adaptive KDE
136
137 // fill a temporary histo using nonadaptive KDE
138 // this histo is identical with the final output when using only nonadaptive KDE
139 fHiddenIteration=true;
140
141 Float_t histoLowEdge=fHist->GetBinLowEdge(1);
142 Float_t histoUpperEdge=fHist->GetBinLowEdge(fHist->GetNbinsX()+1);
143
144 for (Int_t i=1;i<fHist->GetNbinsX();i++) {
145 // loop over the bins of the original histo
146 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
147 // loop over the bins of the PDF histo and fill it
148 fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
149 this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
150 fFirstIterHist->GetBinLowEdge(j+1),
151 fHist->GetBinCenter(i),
152 i)
153 );
154 }
155 if (fKDEborder == 3) { // mirror the samples and fill them again
156 // in order to save time do the mirroring only for the first (the lower) 1/5 of the histo to the left;
157 // and the last (the higher) 1/5 of the histo to the right.
158 // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...
159 if (i < fHist->GetNbinsX()/5 ) { // the first (the lower) 1/5 of the histo
160 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
161 // loop over the bins of the PDF histo and fill it
162 fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
163 this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
164 fFirstIterHist->GetBinLowEdge(j+1),
165 2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left
166 i)
167 );
168 }
169 }
170 if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo
171 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
172 // loop over the bins of the PDF histo and fill it
173 fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
174 this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
175 fFirstIterHist->GetBinLowEdge(j+1),
176 2*histoUpperEdge-fHist->GetBinCenter(i), // mirroring to the right
177 i)
178 );
179 }
180 }
181 }
182 }
183
184 fFirstIterHist->SetEntries(fHist->GetEntries()); //set the number of entries to be the same as the original histo
185
186 // do "function like" integration = sum of (bin_width*bin_content):
187 Float_t integ=0;
188 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++)
189 integ+=fFirstIterHist->GetBinContent(j)*fFirstIterHist->GetBinWidth(j);
190 fFirstIterHist->Scale(1./integ);
191
192 fHiddenIteration=false;
193
194 // OK, now we have the first iteration,
195 // next: calculate the Sigmas (Widths) for the second (adaptive) iteration
196 // based on the output of the first iteration
197 // these Sigmas will be stored in histo called fSigmaHist
198 for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
199 // loop over the bins of the PDF histo and fill fSigmaHist
200 if (fSigma*TMath::Sqrt(1.0/fFirstIterHist->GetBinContent(j)) <= 0 ) {
201 Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
202 }
203
204 fSigmaHist->SetBinContent(j,fFineFactor*fSigma/TMath::Sqrt(fFirstIterHist->GetBinContent(j)));
205 }
206 }
207
208 if (fKernel_integ ==0 ) {
209 Log() << kFATAL << "KDE kernel not correctly initialized!" << Endl;
210 }
211}
212
213////////////////////////////////////////////////////////////////////////////////
214/// calculates the integral of the Kernel
215
217{
218 if ((fIter == kNonadaptiveKDE) || fHiddenIteration )
219 fKernel_integ->SetParameters(mean,fSigma); // non adaptive KDE
220 else if ((fIter == kAdaptiveKDE) && !fHiddenIteration )
221 fKernel_integ->SetParameters(mean,fSigmaHist->GetBinContent(binnum)); // adaptive KDE
222
223 if ( fKDEborder == 2 ) { // renormalization of the kernel function
224 Float_t renormFactor=1.0/fKernel_integ->Eval(fLowerEdge,fUpperEdge);
225 return (renormFactor*fKernel_integ->Eval(lowr,highr));
226 }
227
228 // the RenormFactor takes care about the "border" effects, i.e. sets the
229 // integral to one inside the histogram borders
230 return (fKernel_integ->Eval(lowr,highr));
231}
232
Double_t GaussIntegral(Double_t *x, Double_t *par)
when using Gaussian as Kernel function this is faster way to calculate the integrals
Definition KDEKernel.cxx:87
float Float_t
Float 4 bytes (float)
Definition RtypesCore.h:71
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
1-Dim function class
Definition TF1.h:182
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:879
void Reset(Option_t *option="") override
Reset.
Definition TH1.cxx:10324
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:109
TObject * Clone(const char *newname="") const override
Make a complete copy of the underlying object.
Definition TH1.cxx:2734
TH1F * fHist
copy of input histogram
Definition KDEKernel.h:82
virtual ~KDEKernel(void)
destructor
Definition KDEKernel.cxx:75
TH1F * fSigmaHist
contains the Sigmas Widths for adaptive KDE
Definition KDEKernel.h:84
TH1F * fFirstIterHist
histogram to be filled in the hidden iteration
Definition KDEKernel.h:83
void SetKernelType(EKernelType ktype=kGauss)
fIter == 1 ---> nonadaptive KDE fIter == 2 ---> adaptive KDE
KDEKernel(EKernelIter kiter=kNonadaptiveKDE, const TH1 *hist=nullptr, Float_t lower_edge=0., Float_t upper_edge=1., EKernelBorder kborder=kNoTreatment, Float_t FineFactor=1.)
constructor sanity check
Definition KDEKernel.cxx:48
MsgLogger & Log() const
Definition KDEKernel.h:89
Bool_t fHiddenIteration
Defines if whats currently running is the.
Definition KDEKernel.h:85
Float_t GetBinKernelIntegral(Float_t lowr, Float_t highr, Float_t mean, Int_t binnum)
calculates the integral of the Kernel
ostringstream derivative to redirect and format output
Definition MsgLogger.h:57
Double_t x[n]
Definition legend1.C:17
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Double_t Erf(Double_t x)
Computation of the error function erf(x).
Definition TMath.cxx:190
Double_t Sqrt(Double_t x)
Returns the square root of x.
Definition TMath.h:673
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Returns x raised to the power y.
Definition TMath.h:732
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition TMathBase.h:124