// @(#)root/tmva $Id$   
// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss

/**********************************************************************************
 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
 * Package: TMVA                                                                  *
 * Class  : RegressionVariance                                                    *
 * Web    : http://tmva.sourceforge.net                                           *
 *                                                                                *
 * Description: Calculate the separation critiera useded in regression            *
 *                                                                                *
 *          There are two things: the Separation Index, and the Separation Gain   *
 *          Separation Index:                                                     *
 *          Measure of the "Variance" of a sample.                                *
 *                                                                                *
 *          Separation Gain:                                                      *
 *          the measure of how the quality of separation of the sample increases  *
 *          by splitting the sample e.g. into a "left-node" and a "right-node"    *
 *          (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)  *
 *          this is then the quality crition which is optimized for when trying   *
 *          to increase the information in the system (making the best selection  *
 *                                                                                *
 *                                                                                *
 * Authors (alphabetical):                                                        *
 *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
 *                                                                                *
 * Copyright (c) 2005:                                                            *
 *      CERN, Switzerland                                                         * 
 *      U. of Victoria, Canada                                                    * 
 *      Heidelberg U., Germany                                                    * 
 *                                                                                *
 * Redistribution and use in source and binary forms, with or without             *
 * modification, are permitted according to the terms listed in LICENSE           *
 * (http://ttmva.sourceforge.net/LICENSE)                                         *
 **********************************************************************************/
#include <iostream>
#include "TMath.h"
#include "TMVA/RegressionVariance.h"

ClassImp(TMVA::RegressionVariance)

//_______________________________________________________________________
Double_t TMVA::RegressionVariance::GetSeparationGain(const Double_t &nLeft, 
                                                     const Double_t& targetLeft , const Double_t& target2Left , 
                                                     const Double_t &nTot, 
                                                     const Double_t& targetTot , const Double_t& target2Tot)
{
   // Separation Gain:                                                     
   // the measure of how the quality of separation of the sample increases 
   // by splitting the sample e.g. into a "left-node" and a "right-node"   
   // (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) 
   // this is then the quality crition which is optimized for when trying  
   // to increase the information in the system
   // for the Regression: as the "Gain is maximised", the RMS (sqrt(variance))
   // which is used as a "separation" index should be as small as possible.
   // the "figure of merit" here has to be -(rms left+rms-right) or 1/rms...


   if  ( nTot==nLeft || nLeft==0 ) return 0.;

   Double_t parentIndex = nTot * this->GetSeparationIndex(nTot,targetTot,target2Tot);
   Double_t leftIndex   = ( (nTot - nLeft) * this->GetSeparationIndex(nTot-nLeft,targetTot-targetLeft,target2Tot-target2Left) );
   Double_t rightIndex  =    nLeft * this->GetSeparationIndex(nLeft,targetLeft,target2Left);
    
   //  return 1/ (leftIndex + rightIndex);   
   return (parentIndex - leftIndex - rightIndex)/(parentIndex);   
}

//_______________________________________________________________________
Double_t TMVA::RegressionVariance::GetSeparationIndex(const Double_t& n, 
                                                      const Double_t& target , const Double_t& target2)
{
   // Separation Index:  a simple Variance

   //   return TMath::Sqrt(( target2 - target*target/n) / n);
   return ( target2 - target*target/n) / n;

}



 RegressionVariance.cxx:1
 RegressionVariance.cxx:2
 RegressionVariance.cxx:3
 RegressionVariance.cxx:4
 RegressionVariance.cxx:5
 RegressionVariance.cxx:6
 RegressionVariance.cxx:7
 RegressionVariance.cxx:8
 RegressionVariance.cxx:9
 RegressionVariance.cxx:10
 RegressionVariance.cxx:11
 RegressionVariance.cxx:12
 RegressionVariance.cxx:13
 RegressionVariance.cxx:14
 RegressionVariance.cxx:15
 RegressionVariance.cxx:16
 RegressionVariance.cxx:17
 RegressionVariance.cxx:18
 RegressionVariance.cxx:19
 RegressionVariance.cxx:20
 RegressionVariance.cxx:21
 RegressionVariance.cxx:22
 RegressionVariance.cxx:23
 RegressionVariance.cxx:24
 RegressionVariance.cxx:25
 RegressionVariance.cxx:26
 RegressionVariance.cxx:27
 RegressionVariance.cxx:28
 RegressionVariance.cxx:29
 RegressionVariance.cxx:30
 RegressionVariance.cxx:31
 RegressionVariance.cxx:32
 RegressionVariance.cxx:33
 RegressionVariance.cxx:34
 RegressionVariance.cxx:35
 RegressionVariance.cxx:36
 RegressionVariance.cxx:37
 RegressionVariance.cxx:38
 RegressionVariance.cxx:39
 RegressionVariance.cxx:40
 RegressionVariance.cxx:41
 RegressionVariance.cxx:42
 RegressionVariance.cxx:43
 RegressionVariance.cxx:44
 RegressionVariance.cxx:45
 RegressionVariance.cxx:46
 RegressionVariance.cxx:47
 RegressionVariance.cxx:48
 RegressionVariance.cxx:49
 RegressionVariance.cxx:50
 RegressionVariance.cxx:51
 RegressionVariance.cxx:52
 RegressionVariance.cxx:53
 RegressionVariance.cxx:54
 RegressionVariance.cxx:55
 RegressionVariance.cxx:56
 RegressionVariance.cxx:57
 RegressionVariance.cxx:58
 RegressionVariance.cxx:59
 RegressionVariance.cxx:60
 RegressionVariance.cxx:61
 RegressionVariance.cxx:62
 RegressionVariance.cxx:63
 RegressionVariance.cxx:64
 RegressionVariance.cxx:65
 RegressionVariance.cxx:66
 RegressionVariance.cxx:67
 RegressionVariance.cxx:68
 RegressionVariance.cxx:69
 RegressionVariance.cxx:70
 RegressionVariance.cxx:71
 RegressionVariance.cxx:72
 RegressionVariance.cxx:73
 RegressionVariance.cxx:74
 RegressionVariance.cxx:75
 RegressionVariance.cxx:76
 RegressionVariance.cxx:77
 RegressionVariance.cxx:78
 RegressionVariance.cxx:79
 RegressionVariance.cxx:80
 RegressionVariance.cxx:81