 ROOT   Reference Guide RegressionVariance.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss
3
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RegressionVariance *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: Calculate the separation criteria used in regression *
11  * *
12  * There are two things: the Separation Index, and the Separation Gain *
13  * Separation Index: *
14  * Measure of the "Variance" of a sample. *
15  * *
16  * Separation Gain: *
17  * the measure of how the quality of separation of the sample increases *
18  * by splitting the sample e.g. into a "left-node" and a "right-node" *
19  * (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) *
20  * this is then the quality criteria which is optimized for when trying *
21  * to increase the information in the system (making the best selection *
22  * *
23  * *
24  * Authors (alphabetical): *
25  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
26  * *
27  * Copyright (c) 2005: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * Heidelberg U., Germany *
31  * *
32  * Redistribution and use in source and binary forms, with or without *
33  * modification, are permitted according to the terms listed in LICENSE *
35  **********************************************************************************/
36 #include "TMath.h"
38
40
41 /*! \class TMVA::RegressionVariance
42 \ingroup TMVA
43 Calculate the "SeparationGain" for Regression analysis
44 separation criteria used in various training algorithms
45
46 There are two things: the Separation Index, and the Separation Gain
47 Separation Index:
48 Measure of the "Variance" of a sample.
49
50 Separation Gain:
51 the measure of how the quality of separation of the sample increases
52 by splitting the sample e.g. into a "left-node" and a "right-node"
53 (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
54 this is then the quality criteria which is optimized for when trying
55 to increase the information in the system (making the best selection
56 */
57
58 ////////////////////////////////////////////////////////////////////////////////
59 /// Separation Gain:
60 /// the measure of how the quality of separation of the sample increases
61 /// by splitting the sample e.g. into a "left-node" and a "right-node"
62 /// (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
63 /// this is then the quality criteria which is optimized for when trying
64 /// to increase the information in the system
65 /// for the Regression: as the "Gain is maximised", the RMS (sqrt(variance))
66 /// which is used as a "separation" index should be as small as possible.
67 /// the "figure of merit" here has to be -(rms left+rms-right) or 1/rms...
68
70  const Double_t targetLeft, const Double_t target2Left,
71  const Double_t nTot,
72  const Double_t targetTot, const Double_t target2Tot)
73 {
74
75  if ( nTot==nLeft || nLeft==0 ) return 0.;
76
77  Double_t parentIndex = nTot * this->GetSeparationIndex(nTot,targetTot,target2Tot);
78  Double_t leftIndex = ( (nTot - nLeft) * this->GetSeparationIndex(nTot-nLeft,targetTot-targetLeft,target2Tot-target2Left) );
79  Double_t rightIndex = nLeft * this->GetSeparationIndex(nLeft,targetLeft,target2Left);
80
81  // return 1/ (leftIndex + rightIndex);
82  return (parentIndex - leftIndex - rightIndex)/(parentIndex);
83 }
84
85 ////////////////////////////////////////////////////////////////////////////////
86 /// Separation Index: a simple Variance
87
89  const Double_t target, const Double_t target2)
90 {
91  // return TMath::Sqrt(( target2 - target*target/n) / n);
92  return ( target2 - target*target/n) / n;
93
94 }
95
96
97
TMVA::RegressionVariance
Calculate the "SeparationGain" for Regression analysis separation criteria used in various training a...
Definition: RegressionVariance.h:66
n
const Int_t n
Definition: legend1.C:16
TMVA::RegressionVariance::GetSeparationGain
Double_t GetSeparationGain(const Double_t nLeft, const Double_t targetLeft, const Double_t target2Left, const Double_t nTot, const Double_t targetTot, const Double_t target2Tot)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
Definition: RegressionVariance.cxx:69
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
TMVA::RegressionVariance::GetSeparationIndex
virtual Double_t GetSeparationIndex(const Double_t n, const Double_t target, const Double_t target2)
Separation Index: a simple Variance.
Definition: RegressionVariance.cxx:88
RegressionVariance.h
Double_t
double Double_t
Definition: RtypesCore.h:59
TMath.h