Logo ROOT   6.10/09
Reference Guide
SeparationBase.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : SeparationBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: An interface to different separation criteria used in various *
11  * training algorithms, as there are: *
12  * *
13  * There are two things: the Separation Index, and the Separation Gain *
14  * Separation Index: *
15  * Measure of the "purity" of a sample. If all elements (events) in the *
16  * sample belong to the same class (e.g. signal or backgr), than the *
17  * separation index is 0 (meaning 100% purity (or 0% purity as it is *
18  * symmetric. The index becomes maximal, for perfectly mixed samples *
19  * eg. purity=50% , N_signal = N_bkg *
20  * *
21  * Separation Gain: *
22  * the measure of how the quality of separation of the sample increases *
23  * by splitting the sample e.g. into a "left-node" and a "right-node" *
24  * (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) *
25  * this is then the quality criterion which is optimized for when trying *
26  * to increase the information in the system (making the best selection *
27  * *
28  * Authors (alphabetical): *
29  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
30  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
31  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
32  * *
33  * Copyright (c) 2005: *
34  * CERN, Switzerland *
35  * U. of Victoria, Canada *
36  * Heidelberg U., Germany *
37  * *
38  * Redistribution and use in source and binary forms, with or without *
39  * modification, are permitted according to the terms listed in LICENSE *
40  * (http://tmva.sourceforge.net/LICENSE) *
41  **********************************************************************************/
42 
43 /*! \class TMVA::SeparationBase
44 \ingroup TMVA
45 An interface to calculate the "SeparationGain" for different
46 separation criteria used in various training algorithms
47 
48 There are two things: the Separation Index, and the Separation Gain
49 Separation Index:
50 Measure of the "purity" of a sample. If all elements (events) in the
51 sample belong to the same class (e.g. signal or background), than the
52 separation index is 0 (meaning 100% purity (or 0% purity as it is
53 symmetric. The index becomes maximal, for perfectly mixed samples
54 eg. purity=50% , N_signal = N_bkg
55 
56 Separation Gain:
57 the measure of how the quality of separation of the sample increases
58 by splitting the sample e.g. into a "left-node" and a "right-node"
59 (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
60 this is then the quality criterion which is optimized for when trying
61 to increase the information in the system (making the best selection
62 */
63 #include "TMVA/SeparationBase.h"
64 
65 #include "TMath.h"
66 #include "TString.h"
67 
68 #include <iostream>
69 #include <limits>
70 
72 
73 ////////////////////////////////////////////////////////////////////////////////
74 /// Constructor.
75 
77 fName(""),
78  fPrecisionCut(TMath::Sqrt(std::numeric_limits<double>::epsilon()))
79 {
80  // default constructor
81 }
82 
83 ////////////////////////////////////////////////////////////////////////////////
84 /// Copy constructor.
85 
87  fName(s.fName),
88  fPrecisionCut(TMath::Sqrt(std::numeric_limits<double>::epsilon()))
89 {
90  // copy constructor
91 }
92 
93 ////////////////////////////////////////////////////////////////////////////////
94 /// Separation Gain:
95 /// the measure of how the quality of separation of the sample increases
96 /// by splitting the sample e.g. into a "left-node" and a "right-node"
97 /// (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
98 /// this is then the quality criterion which is optimized for when trying
99 /// to increase the information in the system (making the best selection
100 
102  const Double_t nTotS, const Double_t nTotB)
103 {
104  if ( (nTotS-nSelS)==nSelS && (nTotB-nSelB)==nSelB) return 0.;
105 
106  // Double_t parentIndex = (nTotS+nTotB) *this->GetSeparationIndex(nTotS,nTotB);
107 
108  // Double_t leftIndex = ( ((nTotS - nSelS) + (nTotB - nSelB))
109  // * this->GetSeparationIndex(nTotS-nSelS,nTotB-nSelB) );
110  // Double_t rightIndex = (nSelS+nSelB) * this->GetSeparationIndex(nSelS,nSelB);
111 
112 
113  Double_t parentIndex = this->GetSeparationIndex(nTotS,nTotB);
114 
115  Double_t leftIndex = ( ((nTotS - nSelS) + (nTotB - nSelB))/(nTotS+nTotB)
116  * this->GetSeparationIndex(nTotS-nSelS,nTotB-nSelB) );
117  Double_t rightIndex = (nSelS+nSelB)/(nTotS+nTotB) * this->GetSeparationIndex(nSelS,nSelB);
118 
119  Double_t diff = parentIndex - leftIndex - rightIndex;
120  //Double_t diff = (parentIndex - leftIndex - rightIndex)/(nTotS+nTotB);
121 
122  if(diff<fPrecisionCut ) {
123  // std::cout << " Warning value in GetSeparation is below numerical precision "
124  // << diff/parentIndex
125  // << std::endl;
126  return 0;
127  }
128 
129  return diff;
130 }
131 
132 
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
SeparationBase()
Constructor.
STL namespace.
Double_t Sqrt(Double_t x)
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
REAL epsilon
Definition: triangle.c:617
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
#define ClassImp(name)
Definition: Rtypes.h:336
double Double_t
Definition: RtypesCore.h:55
Abstract ClassifierFactory template that handles arbitrary types.