Logo ROOT  
Reference Guide
RegressionVariance.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : RegressionVariance *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: Calculate the separation criteria used in regression *
11 * *
12 * There are two things: the Separation Index, and the Separation Gain *
13 * Separation Index: *
14 * Measure of the "Variance" of a sample. *
15 * *
16 * Separation Gain: *
17 * the measure of how the quality of separation of the sample increases *
18 * by splitting the sample e.g. into a "left-node" and a "right-node" *
19 * (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) *
20 * this is then the quality criteria which is optimized for when trying *
21 * to increase the information in the system (making the best selection *
22 * *
23 * *
24 * Authors (alphabetical): *
25 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
26 * *
27 * Copyright (c) 2005: *
28 * CERN, Switzerland *
29 * U. of Victoria, Canada *
30 * Heidelberg U., Germany *
31 * *
32 * Redistribution and use in source and binary forms, with or without *
33 * modification, are permitted according to the terms listed in LICENSE *
34 * (http://tmva.sourceforge.net/LICENSE) *
35 **********************************************************************************/
36#include <iostream>
37#include "TMath.h"
39
41
42/*! \class TMVA::RegressionVariance
43\ingroup TMVA
44Calculate the "SeparationGain" for Regression analysis
45separation criteria used in various training algorithms
46
47There are two things: the Separation Index, and the Separation Gain
48Separation Index:
49Measure of the "Variance" of a sample.
50
51Separation Gain:
52the measure of how the quality of separation of the sample increases
53by splitting the sample e.g. into a "left-node" and a "right-node"
54(N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
55this is then the quality criteria which is optimized for when trying
56to increase the information in the system (making the best selection
57*/
58
59////////////////////////////////////////////////////////////////////////////////
60/// Separation Gain:
61/// the measure of how the quality of separation of the sample increases
62/// by splitting the sample e.g. into a "left-node" and a "right-node"
63/// (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
64/// this is then the quality criteria which is optimized for when trying
65/// to increase the information in the system
66/// for the Regression: as the "Gain is maximised", the RMS (sqrt(variance))
67/// which is used as a "separation" index should be as small as possible.
68/// the "figure of merit" here has to be -(rms left+rms-right) or 1/rms...
69
71 const Double_t targetLeft, const Double_t target2Left,
72 const Double_t nTot,
73 const Double_t targetTot, const Double_t target2Tot)
74{
75
76 if ( nTot==nLeft || nLeft==0 ) return 0.;
77
78 Double_t parentIndex = nTot * this->GetSeparationIndex(nTot,targetTot,target2Tot);
79 Double_t leftIndex = ( (nTot - nLeft) * this->GetSeparationIndex(nTot-nLeft,targetTot-targetLeft,target2Tot-target2Left) );
80 Double_t rightIndex = nLeft * this->GetSeparationIndex(nLeft,targetLeft,target2Left);
81
82 // return 1/ (leftIndex + rightIndex);
83 return (parentIndex - leftIndex - rightIndex)/(parentIndex);
84}
85
86////////////////////////////////////////////////////////////////////////////////
87/// Separation Index: a simple Variance
88
90 const Double_t target, const Double_t target2)
91{
92 // return TMath::Sqrt(( target2 - target*target/n) / n);
93 return ( target2 - target*target/n) / n;
94
95}
96
97
98
double Double_t
Definition: RtypesCore.h:55
#define ClassImp(name)
Definition: Rtypes.h:365
Calculate the "SeparationGain" for Regression analysis separation criteria used in various training a...
virtual Double_t GetSeparationIndex(const Double_t n, const Double_t target, const Double_t target2)
Separation Index: a simple Variance.
Double_t GetSeparationGain(const Double_t nLeft, const Double_t targetLeft, const Double_t target2Left, const Double_t nTot, const Double_t targetTot, const Double_t target2Tot)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
const Int_t n
Definition: legend1.C:16