Logo ROOT  
Reference Guide
SeparationBase.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : SeparationBase *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: An interface to different separation criteria used in various *
11 * training algorithms, as there are: *
12 * *
13 * There are two things: the Separation Index, and the Separation Gain *
14 * Separation Index: *
15 * Measure of the "purity" of a sample. If all elements (events) in the *
16 * sample belong to the same class (e.g. signal or backgr), than the *
17 * separation index is 0 (meaning 100% purity (or 0% purity as it is *
18 * symmetric. The index becomes maximal, for perfectly mixed samples *
19 * eg. purity=50% , N_signal = N_bkg *
20 * *
21 * Separation Gain: *
22 * the measure of how the quality of separation of the sample increases *
23 * by splitting the sample e.g. into a "left-node" and a "right-node" *
24 * (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) *
25 * this is then the quality criterion which is optimized for when trying *
26 * to increase the information in the system (making the best selection *
27 * *
28 * Authors (alphabetical): *
29 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
30 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
31 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
32 * *
33 * Copyright (c) 2005: *
34 * CERN, Switzerland *
35 * U. of Victoria, Canada *
36 * Heidelberg U., Germany *
37 * *
38 * Redistribution and use in source and binary forms, with or without *
39 * modification, are permitted according to the terms listed in LICENSE *
40 * (http://tmva.sourceforge.net/LICENSE) *
41 **********************************************************************************/
42
43/*! \class TMVA::SeparationBase
44\ingroup TMVA
45An interface to calculate the "SeparationGain" for different
46separation criteria used in various training algorithms
47
48There are two things: the Separation Index, and the Separation Gain
49Separation Index:
50Measure of the "purity" of a sample. If all elements (events) in the
51sample belong to the same class (e.g. signal or background), than the
52separation index is 0 (meaning 100% purity (or 0% purity as it is
53symmetric. The index becomes maximal, for perfectly mixed samples
54eg. purity=50% , N_signal = N_bkg
55
56Separation Gain:
57the measure of how the quality of separation of the sample increases
58by splitting the sample e.g. into a "left-node" and a "right-node"
59(N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
60this is then the quality criterion which is optimized for when trying
61to increase the information in the system (making the best selection
62*/
63#include "TMVA/SeparationBase.h"
64
65#include "TMath.h"
66#include "TString.h"
67
68#include <iostream>
69#include <limits>
70
72
73////////////////////////////////////////////////////////////////////////////////
74/// Constructor.
75
77fName(""),
78 fPrecisionCut(TMath::Sqrt(std::numeric_limits<double>::epsilon()))
79{
80 // default constructor
81}
82
83////////////////////////////////////////////////////////////////////////////////
84/// Copy constructor.
85
87 fName(s.fName),
88 fPrecisionCut(TMath::Sqrt(std::numeric_limits<double>::epsilon()))
89{
90 // copy constructor
91}
92
93////////////////////////////////////////////////////////////////////////////////
94/// Separation Gain:
95/// the measure of how the quality of separation of the sample increases
96/// by splitting the sample e.g. into a "left-node" and a "right-node"
97/// (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
98/// this is then the quality criterion which is optimized for when trying
99/// to increase the information in the system (making the best selection
100
102 const Double_t nTotS, const Double_t nTotB)
103{
104 if ( (nTotS-nSelS)==nSelS && (nTotB-nSelB)==nSelB) return 0.;
105
106 // Double_t parentIndex = (nTotS+nTotB) *this->GetSeparationIndex(nTotS,nTotB);
107
108 // Double_t leftIndex = ( ((nTotS - nSelS) + (nTotB - nSelB))
109 // * this->GetSeparationIndex(nTotS-nSelS,nTotB-nSelB) );
110 // Double_t rightIndex = (nSelS+nSelB) * this->GetSeparationIndex(nSelS,nSelB);
111
112
113 Double_t parentIndex = this->GetSeparationIndex(nTotS,nTotB);
114
115 Double_t leftIndex = ( ((nTotS - nSelS) + (nTotB - nSelB))/(nTotS+nTotB)
116 * this->GetSeparationIndex(nTotS-nSelS,nTotB-nSelB) );
117 Double_t rightIndex = (nSelS+nSelB)/(nTotS+nTotB) * this->GetSeparationIndex(nSelS,nSelB);
118
119 Double_t diff = parentIndex - leftIndex - rightIndex;
120 //Double_t diff = (parentIndex - leftIndex - rightIndex)/(nTotS+nTotB);
121
122 if(diff<fPrecisionCut ) {
123 // std::cout << " Warning value in GetSeparation is below numerical precision "
124 // << diff/parentIndex
125 // << std::endl;
126 return 0;
127 }
128
129 return diff;
130}
131
132
double Double_t
Definition: RtypesCore.h:55
#define ClassImp(name)
Definition: Rtypes.h:365
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
SeparationBase()
Constructor.
Double_t Sqrt(Double_t x)
static constexpr double s
TMath.
Definition: TMathBase.h:35
REAL epsilon
Definition: triangle.c:617