Logo ROOT  
Reference Guide
PDEFoamDecisionTree.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Alexander Voigt
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Classes: PDEFoamDecisionTree *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation of decision tree like PDEFoam *
12  * *
13  * Authors (alphabetical): *
14  * Tancredi Carli - CERN, Switzerland *
15  * Dominik Dannheim - CERN, Switzerland *
16  * S. Jadach - Institute of Nuclear Physics, Cracow, Poland *
17  * Alexander Voigt - TU Dresden, Germany *
18  * Peter Speckmayer - CERN, Switzerland *
19  * *
20  * Copyright (c) 2010: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 /*! \class TMVA::PDEFoamDecisionTree
30 \ingroup TMVA
31 
32 This PDEFoam variant acts like a decision tree and stores in every
33 cell the discriminant
34 
35  D = #events with given class / total number of events
36 
37 as well as the statistical error on the discriminant. It therefore
38 acts as a discriminant estimator. The decision tree-like behaviour
39 is achieved by overriding PDEFoamDiscriminant::Explore() to use a
40 decision tree-like cell splitting algorithm (given a separation
41 type).
42 
43 This PDEFoam variant should be booked together with the
44 PDEFoamDecisionTreeDensity density estimator, which returns the
45 events in a cell without sampling.
46 */
47 
50 
51 #include "TMVA/MsgLogger.h"
52 #include "TMVA/PDEFoamCell.h"
54 #include "TMVA/PDEFoamVect.h"
55 #include "TMVA/SeparationBase.h"
56 #include "TMVA/Types.h"
57 #include "TMVA/Volume.h"
58 
59 #include "Rtypes.h"
60 #include "TH1D.h"
61 
62 class TString;
63 
65 
66 ////////////////////////////////////////////////////////////////////////////////
67 /// Default constructor for streamer, user should not use it.
68 
71  , fSepType(NULL)
72 {
73 }
74 
75 ////////////////////////////////////////////////////////////////////////////////
76 /// Parameters:
77 ///
78 /// - name - name of the foam
79 ///
80 /// - sepType - separation type used for the cell splitting (will be
81 /// deleted in the destructor)
82 ///
83 /// - cls - class to consider as signal when calculating the purity
84 
87  , fSepType(sepType)
88 {
89 }
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 /// Copy Constructor NOT IMPLEMENTED (NEVER USED)
93 
95  : PDEFoamDiscriminant(from)
96  , fSepType(from.fSepType)
97 {
98  Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl;
99 }
100 
101 ////////////////////////////////////////////////////////////////////////////////
102 /// Destructor
103 /// deletes fSepType
104 
106 {
107  if (fSepType)
108  delete fSepType;
109 }
110 
111 ////////////////////////////////////////////////////////////////////////////////
112 /// Internal subprogram used by Create. It explores newly defined
113 /// cell with according to the decision tree logic. The separation
114 /// set via the 'sepType' option in the constructor.
115 ///
116 /// The optimal division point for eventual future cell division is
117 /// determined/recorded. Note that links to parents and initial
118 /// volume = 1/2 parent has to be already defined prior to calling
119 /// this routine.
120 ///
121 /// Note, that according to the decision tree logic, a cell is only
122 /// split, if the number of (unweighted) events in each daughter
123 /// cell is greater than fNmin.
124 
126 {
127  if (!cell)
128  Log() << kFATAL << "<DTExplore> Null pointer given!" << Endl;
129 
130  // create edge histograms
131  std::vector<TH1D*> hsig, hbkg, hsig_unw, hbkg_unw;
132  hsig.reserve(fDim);
133  hbkg.reserve(fDim);
134  hsig_unw.reserve(fDim);
135  hbkg_unw.reserve(fDim);
136  for (Int_t idim = 0; idim < fDim; idim++) {
137  hsig.push_back(new TH1D(Form("hsig_%i", idim),
138  Form("signal[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
139  hbkg.push_back(new TH1D(Form("hbkg_%i", idim),
140  Form("background[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
141  hsig_unw.push_back(new TH1D(Form("hsig_unw_%i", idim),
142  Form("signal_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
143  hbkg_unw.push_back(new TH1D(Form("hbkg_unw_%i", idim),
144  Form("background_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
145  }
146 
147  // get cell position and size
148  PDEFoamVect cellSize(GetTotDim()), cellPosi(GetTotDim());
149  cell->GetHcub(cellPosi, cellSize);
150 
151  // determine lower and upper cell bound
152  std::vector<Double_t> lb(GetTotDim()); // lower bound
153  std::vector<Double_t> ub(GetTotDim()); // upper bound
154  for (Int_t idim = 0; idim < GetTotDim(); idim++) {
155  lb[idim] = VarTransformInvers(idim, cellPosi[idim] - std::numeric_limits<float>::epsilon());
156  ub[idim] = VarTransformInvers(idim, cellPosi[idim] + cellSize[idim] + std::numeric_limits<float>::epsilon());
157  }
158 
159  // fDistr must be of type PDEFoamDecisionTreeDensity*
160  PDEFoamDecisionTreeDensity *distr = dynamic_cast<PDEFoamDecisionTreeDensity*>(fDistr);
161  if (distr == NULL)
162  Log() << kFATAL << "<PDEFoamDecisionTree::Explore>: cast failed: "
163  << "PDEFoamDensityBase* --> PDEFoamDecisionTreeDensity*" << Endl;
164 
165  // create TMVA::Volume object needed for searching within the BST
166  TMVA::Volume volume(&lb, &ub);
167 
168  // fill the signal and background histograms for the given volume
169  distr->FillHistograms(volume, hsig, hbkg, hsig_unw, hbkg_unw);
170 
171  // ------ determine the best division edge
172  Double_t xBest = 0.5; // best division point
173  Int_t kBest = -1; // best split dimension
174  Double_t maxGain = -1.0; // maximum gain
175  Double_t nTotS = hsig.at(0)->Integral(0, hsig.at(0)->GetNbinsX() + 1);
176  Double_t nTotB = hbkg.at(0)->Integral(0, hbkg.at(0)->GetNbinsX() + 1);
177  Double_t nTotS_unw = hsig_unw.at(0)->Integral(0, hsig_unw.at(0)->GetNbinsX() + 1);
178  Double_t nTotB_unw = hbkg_unw.at(0)->Integral(0, hbkg_unw.at(0)->GetNbinsX() + 1);
179 
180  for (Int_t idim = 0; idim < fDim; ++idim) {
181  Double_t nSelS = hsig.at(idim)->GetBinContent(0);
182  Double_t nSelB = hbkg.at(idim)->GetBinContent(0);
183  Double_t nSelS_unw = hsig_unw.at(idim)->GetBinContent(0);
184  Double_t nSelB_unw = hbkg_unw.at(idim)->GetBinContent(0);
185  for (Int_t jLo = 1; jLo < fNBin; jLo++) {
186  nSelS += hsig.at(idim)->GetBinContent(jLo);
187  nSelB += hbkg.at(idim)->GetBinContent(jLo);
188  nSelS_unw += hsig_unw.at(idim)->GetBinContent(jLo);
189  nSelB_unw += hbkg_unw.at(idim)->GetBinContent(jLo);
190 
191  // proceed if total number of events in left and right cell
192  // is greater than fNmin
193  if (!((nSelS_unw + nSelB_unw) >= GetNmin() &&
194  (nTotS_unw - nSelS_unw + nTotB_unw - nSelB_unw) >= GetNmin()))
195  continue;
196 
197  Double_t xLo = 1.0 * jLo / fNBin;
198 
199  // calculate separation gain
200  Double_t gain = fSepType->GetSeparationGain(nSelS, nSelB, nTotS, nTotB);
201 
202  if (gain >= maxGain) {
203  maxGain = gain;
204  xBest = xLo;
205  kBest = idim;
206  }
207  } // jLo
208  } // idim
209 
210  if (kBest >= fDim || kBest < 0) {
211  // No best division edge found! One must ensure, that this cell
212  // is not chosen for splitting in PeekMax(). But since in
213  // PeekMax() it is ensured that cell->GetDriv() > epsilon, one
214  // should set maxGain to -1.0 (or even 0.0?) here.
215  maxGain = -1.0;
216  }
217 
218  // set cell properties
219  cell->SetBest(kBest);
220  cell->SetXdiv(xBest);
221  if (nTotB + nTotS > 0)
222  cell->SetIntg(nTotS / (nTotB + nTotS));
223  else
224  cell->SetIntg(0.0);
225  cell->SetDriv(maxGain);
226  cell->CalcVolume();
227 
228  // set cell element 0 (total number of events in cell) during
229  // build-up
230  if (GetNmin() > 0)
231  SetCellElement(cell, 0, nTotS + nTotB);
232 
233  // clean up
234  for (UInt_t ih = 0; ih < hsig.size(); ih++) delete hsig.at(ih);
235  for (UInt_t ih = 0; ih < hbkg.size(); ih++) delete hbkg.at(ih);
236  for (UInt_t ih = 0; ih < hsig_unw.size(); ih++) delete hsig_unw.at(ih);
237  for (UInt_t ih = 0; ih < hbkg_unw.size(); ih++) delete hbkg_unw.at(ih);
238 }
TMVA::PDEFoamCell::SetBest
void SetBest(Int_t Best)
Definition: PDEFoamCell.h:79
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
Form
char * Form(const char *fmt,...)
TMVA::Volume
Volume for BinarySearchTree.
Definition: Volume.h:47
PDEFoamCell.h
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:760
TH1D
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:618
PDEFoamDensityBase.h
TMVA::PDEFoamCell::SetXdiv
void SetXdiv(Double_t Xdiv)
Definition: PDEFoamCell.h:80
TMVA::PDEFoamCell::SetDriv
void SetDriv(Double_t Driv)
Definition: PDEFoamCell.h:89
TString
Basic string class.
Definition: TString.h:136
PDEFoamVect.h
Volume.h
PDEFoamDecisionTree.h
TMVA::PDEFoamCell::GetHcub
void GetHcub(PDEFoamVect &, PDEFoamVect &) const
Provides size and position of the cell These parameter are calculated by analyzing information in all...
Definition: PDEFoamCell.cxx:140
SeparationBase.h
MsgLogger.h
epsilon
REAL epsilon
Definition: triangle.c:617
TMVA::PDEFoamDecisionTree::Explore
virtual void Explore(PDEFoamCell *Cell)
Internal subprogram used by Create.
Definition: PDEFoamDecisionTree.cxx:125
TMVA::PDEFoamVect
Definition: PDEFoamVect.h:36
Types.h
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
unsigned int
TMVA::PDEFoamDecisionTree
This PDEFoam variant acts like a decision tree and stores in every cell the discriminant.
Definition: PDEFoamDecisionTree.h:40
TMVA::PDEFoamCell::SetIntg
void SetIntg(Double_t Intg)
Definition: PDEFoamCell.h:88
TMVA::PDEFoam::Log
MsgLogger & Log() const
Definition: PDEFoam.h:240
TMVA::SeparationBase
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
Definition: SeparationBase.h:82
TMVA::PDEFoamCell::CalcVolume
void CalcVolume()
Calculates volume of the cell using size params which are calculated.
Definition: PDEFoamCell.cxx:194
Double_t
double Double_t
Definition: RtypesCore.h:59
TMVA::PDEFoamDecisionTree::PDEFoamDecisionTree
PDEFoamDecisionTree()
Default constructor for streamer, user should not use it.
Definition: PDEFoamDecisionTree.cxx:69
name
char name[80]
Definition: TGX11.cxx:110
TMVA::PDEFoamDiscriminant
This PDEFoam variant stores in every cell the discriminant.
Definition: PDEFoamDiscriminant.h:39
TH1D.h
TMVA::PDEFoamDecisionTree::~PDEFoamDecisionTree
virtual ~PDEFoamDecisionTree()
Destructor deletes fSepType.
Definition: PDEFoamDecisionTree.cxx:105
PDEFoamDecisionTreeDensity.h
TMVA::PDEFoamDecisionTreeDensity
This is a concrete implementation of PDEFoam.
Definition: PDEFoamDecisionTreeDensity.h:47
Rtypes.h
TMVA::PDEFoamCell
Definition: PDEFoamCell.h:41
TMVA::PDEFoamDecisionTreeDensity::FillHistograms
virtual void FillHistograms(TMVA::Volume &, std::vector< TH1D * > &, std::vector< TH1D * > &, std::vector< TH1D * > &, std::vector< TH1D * > &)
Fill the given histograms with signal and background events, which are found in the volume.
Definition: PDEFoamDecisionTreeDensity.cxx:111
int