Logo ROOT   6.08/07
Reference Guide
RegressionVariance.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RegressionVariance *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: Calculate the separation critiera useded in regression *
11  * *
12  * There are two things: the Separation Index, and the Separation Gain *
13  * Separation Index: *
14  * Measure of the "Variance" of a sample. *
15  * *
16  * Separation Gain: *
17  * the measure of how the quality of separation of the sample increases *
18  * by splitting the sample e.g. into a "left-node" and a "right-node" *
19  * (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) *
20  * this is then the quality crition which is optimized for when trying *
21  * to increase the information in the system (making the best selection *
22  * *
23  * *
24  * Authors (alphabetical): *
25  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
26  * *
27  * Copyright (c) 2005: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * Heidelberg U., Germany *
31  * *
32  * Redistribution and use in source and binary forms, with or without *
33  * modification, are permitted according to the terms listed in LICENSE *
34  * (http://ttmva.sourceforge.net/LICENSE) *
35  **********************************************************************************/
36 #include <iostream>
37 #include "TMath.h"
39 
41 
42 ////////////////////////////////////////////////////////////////////////////////
43 /// Separation Gain:
44 /// the measure of how the quality of separation of the sample increases
45 /// by splitting the sample e.g. into a "left-node" and a "right-node"
46 /// (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
47 /// this is then the quality crition which is optimized for when trying
48 /// to increase the information in the system
49 /// for the Regression: as the "Gain is maximised", the RMS (sqrt(variance))
50 /// which is used as a "separation" index should be as small as possible.
51 /// the "figure of merit" here has to be -(rms left+rms-right) or 1/rms...
52 
53 Double_t TMVA::RegressionVariance::GetSeparationGain(const Double_t &nLeft,
54  const Double_t& targetLeft , const Double_t& target2Left ,
55  const Double_t &nTot,
56  const Double_t& targetTot , const Double_t& target2Tot)
57 {
58 
59  if ( nTot==nLeft || nLeft==0 ) return 0.;
60 
61  Double_t parentIndex = nTot * this->GetSeparationIndex(nTot,targetTot,target2Tot);
62  Double_t leftIndex = ( (nTot - nLeft) * this->GetSeparationIndex(nTot-nLeft,targetTot-targetLeft,target2Tot-target2Left) );
63  Double_t rightIndex = nLeft * this->GetSeparationIndex(nLeft,targetLeft,target2Left);
64 
65  // return 1/ (leftIndex + rightIndex);
66  return (parentIndex - leftIndex - rightIndex)/(parentIndex);
67 }
68 
69 ////////////////////////////////////////////////////////////////////////////////
70 /// Separation Index: a simple Variance
71 
73  const Double_t& target , const Double_t& target2)
74 {
75  // return TMath::Sqrt(( target2 - target*target/n) / n);
76  return ( target2 - target*target/n) / n;
77 
78 }
79 
80 
81 
virtual Double_t GetSeparationIndex(const Double_t &n, const Double_t &target, const Double_t &target2)
Separation Index: a simple Variance.
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
Abstract ClassifierFactory template that handles arbitrary types.
const Int_t n
Definition: legend1.C:16