Logo ROOT  
Reference Guide
PDEFoamDecisionTree.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Alexander Voigt
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Classes: PDEFoamDecisionTree *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation of decision tree like PDEFoam *
12 * *
13 * Authors (alphabetical): *
14 * Tancredi Carli - CERN, Switzerland *
15 * Dominik Dannheim - CERN, Switzerland *
16 * S. Jadach - Institute of Nuclear Physics, Cracow, Poland *
17 * Alexander Voigt - TU Dresden, Germany *
18 * Peter Speckmayer - CERN, Switzerland *
19 * *
20 * Copyright (c) 2010: *
21 * CERN, Switzerland *
22 * MPI-K Heidelberg, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29/*! \class TMVA::PDEFoamDecisionTree
30\ingroup TMVA
31
32This PDEFoam variant acts like a decision tree and stores in every
33cell the discriminant
34
35 D = #events with given class / total number of events
36
37as well as the statistical error on the discriminant. It therefore
38acts as a discriminant estimator. The decision tree-like behaviour
39is achieved by overriding PDEFoamDiscriminant::Explore() to use a
40decision tree-like cell splitting algorithm (given a separation
41type).
42
43This PDEFoam variant should be booked together with the
44PDEFoamDecisionTreeDensity density estimator, which returns the
45events in a cell without sampling.
46*/
47
50
51#include "TMVA/MsgLogger.h"
52#include "TMVA/PDEFoamCell.h"
54#include "TMVA/PDEFoamVect.h"
55#include "TMVA/SeparationBase.h"
56#include "TMVA/Types.h"
57#include "TMVA/Volume.h"
58
59#include "Rtypes.h"
60#include "TH1D.h"
61
62class TString;
63
65
66////////////////////////////////////////////////////////////////////////////////
67/// Default constructor for streamer, user should not use it.
68
71 , fSepType(NULL)
72{
73}
74
75////////////////////////////////////////////////////////////////////////////////
76/// Parameters:
77///
78/// - name - name of the foam
79///
80/// - sepType - separation type used for the cell splitting (will be
81/// deleted in the destructor)
82///
83/// - cls - class to consider as signal when calculating the purity
84
87 , fSepType(sepType)
88{
89}
90
91////////////////////////////////////////////////////////////////////////////////
92/// Copy Constructor NOT IMPLEMENTED (NEVER USED)
93
96 , fSepType(from.fSepType)
97{
98 Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl;
99}
100
101////////////////////////////////////////////////////////////////////////////////
102/// Destructor
103/// deletes fSepType
104
106{
107 if (fSepType)
108 delete fSepType;
109}
110
111////////////////////////////////////////////////////////////////////////////////
112/// Internal subprogram used by Create. It explores newly defined
113/// cell with according to the decision tree logic. The separation
114/// set via the 'sepType' option in the constructor.
115///
116/// The optimal division point for eventual future cell division is
117/// determined/recorded. Note that links to parents and initial
118/// volume = 1/2 parent has to be already defined prior to calling
119/// this routine.
120///
121/// Note, that according to the decision tree logic, a cell is only
122/// split, if the number of (unweighted) events in each daughter
123/// cell is greater than fNmin.
124
126{
127 if (!cell)
128 Log() << kFATAL << "<DTExplore> Null pointer given!" << Endl;
129
130 // create edge histograms
131 std::vector<TH1D*> hsig, hbkg, hsig_unw, hbkg_unw;
132 hsig.reserve(fDim);
133 hbkg.reserve(fDim);
134 hsig_unw.reserve(fDim);
135 hbkg_unw.reserve(fDim);
136 for (Int_t idim = 0; idim < fDim; idim++) {
137 hsig.push_back(new TH1D(Form("hsig_%i", idim),
138 Form("signal[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
139 hbkg.push_back(new TH1D(Form("hbkg_%i", idim),
140 Form("background[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
141 hsig_unw.push_back(new TH1D(Form("hsig_unw_%i", idim),
142 Form("signal_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
143 hbkg_unw.push_back(new TH1D(Form("hbkg_unw_%i", idim),
144 Form("background_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
145 }
146
147 // get cell position and size
148 PDEFoamVect cellSize(GetTotDim()), cellPosi(GetTotDim());
149 cell->GetHcub(cellPosi, cellSize);
150
151 // determine lower and upper cell bound
152 std::vector<Double_t> lb(GetTotDim()); // lower bound
153 std::vector<Double_t> ub(GetTotDim()); // upper bound
154 for (Int_t idim = 0; idim < GetTotDim(); idim++) {
155 lb[idim] = VarTransformInvers(idim, cellPosi[idim] - std::numeric_limits<float>::epsilon());
156 ub[idim] = VarTransformInvers(idim, cellPosi[idim] + cellSize[idim] + std::numeric_limits<float>::epsilon());
157 }
158
159 // fDistr must be of type PDEFoamDecisionTreeDensity*
160 PDEFoamDecisionTreeDensity *distr = dynamic_cast<PDEFoamDecisionTreeDensity*>(fDistr);
161 if (distr == NULL)
162 Log() << kFATAL << "<PDEFoamDecisionTree::Explore>: cast failed: "
163 << "PDEFoamDensityBase* --> PDEFoamDecisionTreeDensity*" << Endl;
164
165 // create TMVA::Volume object needed for searching within the BST
166 TMVA::Volume volume(&lb, &ub);
167
168 // fill the signal and background histograms for the given volume
169 distr->FillHistograms(volume, hsig, hbkg, hsig_unw, hbkg_unw);
170
171 // ------ determine the best division edge
172 Double_t xBest = 0.5; // best division point
173 Int_t kBest = -1; // best split dimension
174 Double_t maxGain = -1.0; // maximum gain
175 Double_t nTotS = hsig.at(0)->Integral(0, hsig.at(0)->GetNbinsX() + 1);
176 Double_t nTotB = hbkg.at(0)->Integral(0, hbkg.at(0)->GetNbinsX() + 1);
177 Double_t nTotS_unw = hsig_unw.at(0)->Integral(0, hsig_unw.at(0)->GetNbinsX() + 1);
178 Double_t nTotB_unw = hbkg_unw.at(0)->Integral(0, hbkg_unw.at(0)->GetNbinsX() + 1);
179
180 for (Int_t idim = 0; idim < fDim; ++idim) {
181 Double_t nSelS = hsig.at(idim)->GetBinContent(0);
182 Double_t nSelB = hbkg.at(idim)->GetBinContent(0);
183 Double_t nSelS_unw = hsig_unw.at(idim)->GetBinContent(0);
184 Double_t nSelB_unw = hbkg_unw.at(idim)->GetBinContent(0);
185 for (Int_t jLo = 1; jLo < fNBin; jLo++) {
186 nSelS += hsig.at(idim)->GetBinContent(jLo);
187 nSelB += hbkg.at(idim)->GetBinContent(jLo);
188 nSelS_unw += hsig_unw.at(idim)->GetBinContent(jLo);
189 nSelB_unw += hbkg_unw.at(idim)->GetBinContent(jLo);
190
191 // proceed if total number of events in left and right cell
192 // is greater than fNmin
193 if (!((nSelS_unw + nSelB_unw) >= GetNmin() &&
194 (nTotS_unw - nSelS_unw + nTotB_unw - nSelB_unw) >= GetNmin()))
195 continue;
196
197 Double_t xLo = 1.0 * jLo / fNBin;
198
199 // calculate separation gain
200 Double_t gain = fSepType->GetSeparationGain(nSelS, nSelB, nTotS, nTotB);
201
202 if (gain >= maxGain) {
203 maxGain = gain;
204 xBest = xLo;
205 kBest = idim;
206 }
207 } // jLo
208 } // idim
209
210 if (kBest >= fDim || kBest < 0) {
211 // No best division edge found! One must ensure, that this cell
212 // is not chosen for splitting in PeekMax(). But since in
213 // PeekMax() it is ensured that cell->GetDriv() > epsilon, one
214 // should set maxGain to -1.0 (or even 0.0?) here.
215 maxGain = -1.0;
216 }
217
218 // set cell properties
219 cell->SetBest(kBest);
220 cell->SetXdiv(xBest);
221 if (nTotB + nTotS > 0)
222 cell->SetIntg(nTotS / (nTotB + nTotS));
223 else
224 cell->SetIntg(0.0);
225 cell->SetDriv(maxGain);
226 cell->CalcVolume();
227
228 // set cell element 0 (total number of events in cell) during
229 // build-up
230 if (GetNmin() > 0)
231 SetCellElement(cell, 0, nTotS + nTotB);
232
233 // clean up
234 for (UInt_t ih = 0; ih < hsig.size(); ih++) delete hsig.at(ih);
235 for (UInt_t ih = 0; ih < hbkg.size(); ih++) delete hbkg.at(ih);
236 for (UInt_t ih = 0; ih < hsig_unw.size(); ih++) delete hsig_unw.at(ih);
237 for (UInt_t ih = 0; ih < hbkg_unw.size(); ih++) delete hbkg_unw.at(ih);
238}
double Double_t
Definition: RtypesCore.h:57
#define ClassImp(name)
Definition: Rtypes.h:361
char name[80]
Definition: TGX11.cxx:109
char * Form(const char *fmt,...)
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:614
void SetXdiv(Double_t Xdiv)
Definition: PDEFoamCell.h:80
void CalcVolume()
Calculates volume of the cell using size params which are calculated.
void SetDriv(Double_t Driv)
Definition: PDEFoamCell.h:89
void SetBest(Int_t Best)
Definition: PDEFoamCell.h:79
void GetHcub(PDEFoamVect &, PDEFoamVect &) const
Provides size and position of the cell These parameter are calculated by analyzing information in all...
void SetIntg(Double_t Intg)
Definition: PDEFoamCell.h:88
This is a concrete implementation of PDEFoam.
virtual void FillHistograms(TMVA::Volume &, std::vector< TH1D * > &, std::vector< TH1D * > &, std::vector< TH1D * > &, std::vector< TH1D * > &)
Fill the given histograms with signal and background events, which are found in the volume.
This PDEFoam variant acts like a decision tree and stores in every cell the discriminant.
virtual void Explore(PDEFoamCell *Cell)
Internal subprogram used by Create.
PDEFoamDecisionTree()
Default constructor for streamer, user should not use it.
virtual ~PDEFoamDecisionTree()
Destructor deletes fSepType.
This PDEFoam variant stores in every cell the discriminant.
MsgLogger & Log() const
Definition: PDEFoam.h:238
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
Volume for BinarySearchTree.
Definition: Volume.h:48
Basic string class.
Definition: TString.h:131
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:750
REAL epsilon
Definition: triangle.c:617