// @(#)root/tmva $Id: TMVA_MethodPDERS.cxx,v 1.6 2006/05/10 07:36:12 brun Exp $
// Author: Andreas Hoecker, Helge Voss, Kai Voss

/**********************************************************************************
 * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis       *
 * Package: TMVA                                                                  *
 * Class  : TMVA_MethodPDERS                                                      *
 *                                                                                *
 * Description:                                                                   *
 *      Multidimensional Likelihood using the "Probability density estimator      *
 *      range search" (PDERS) method suggested in                                 *
 *      T. Carli and B. Koblitz, NIM A 501, 576 (2003)                            *
 *                                                                                *
 *      Implementation (see header file for description)                          *
 *                                                                                *
 * Authors (alphabetical):                                                        *
 *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
 *      Xavier Prudent  <prudent@lapp.in2p3.fr>  - LAPP, France                   *
 *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-KP Heidelberg, Germany     *
 *      Kai Voss        <Kai.Voss@cern.ch>       - U. of Victoria, Canada         *
 *                                                                                *
 * Copyright (c) 2005:                                                            *
 *      CERN, Switzerland,                                                        *
 *      U. of Victoria, Canada,                                                   *
 *      MPI-KP Heidelberg, Germany,                                               *
 *      LAPP, Annecy, France                                                      *
 *                                                                                *
 * Redistribution and use in source and binary forms, with or without             *
 * modification, are permitted according to the terms listed in LICENSE           *
 * (http://mva.sourceforge.net/license.txt)                                       *
 *                                                                                *
 **********************************************************************************/

//_______________________________________________________________________
//
// Multidimensional Likelihood using the "Probability density
// estimator range search" (PDERS) method
//
//_______________________________________________________________________

#include "TMVA_MethodPDERS.h"
#include "TMVA_Tools.h"
#include "TMVA_RootFinder.h"
#include "TFile.h"
#include "TObjString.h"
#include "TMath.h"
#include <stdexcept>

#define DEBUG_TMVA_MethodPDERS             kFALSE
#define TMVA_MethodPDERS_UseFindRoot       kTRUE
#define TMVA_MethodPDERS_UseKernelEstimate kFALSE

using std::vector;

ClassImp(TMVA_MethodPDERS)

//_______________________________________________________________________
 TMVA_MethodPDERS::TMVA_MethodPDERS( TString jobName, vector<TString>* theVariables,
				    TTree* theTree, TString theOption, TDirectory* theTargetDir )
  : TMVA_MethodBase( jobName, theVariables, theTree, theOption, theTargetDir )
{
  InitPDERS();

  // interpret options string
  // default settings (should be defined in fOptions string)
  TList*  list  = TMVA_Tools::ParseFormatLine( fOptions );

  // format and syntax of option string: "VolumeRangeMode:options"
  //
  // where:
  //  VolumeRangeMode - all methods defined in private enum "VolumeRangeMode"
  //  options         - deltaFrac in case of VolumeRangeMode=MinMax/RMS
  //                  - nEventsMin/Max, maxVIterations, scale for VolumeRangeMode=Adaptive

  if (list->GetSize() > 0) {
    TString s = ((TObjString*)list->At(0))->GetString();
    s.ToLower();
    if       (s.Contains("minmax")  ) fVRangeMode = TMVA_MethodPDERS::kMinMax;
    else  if (s.Contains("rms")     ) fVRangeMode = TMVA_MethodPDERS::kRMS;
    else  if (s.Contains("adaptive")) fVRangeMode = TMVA_MethodPDERS::kAdaptive;
    else {
      cout  << "--- " << GetName() << ": Fatal error unknown vRangeType type: "
	    << s << " in first option" << endl;
      throw std::invalid_argument( "Abort" );
    }
  }
  if (list->GetSize() > 1 && (fVRangeMode == kMinMax || fVRangeMode == kRMS)) {
    TString s = ((TObjString*)list->At(0))->GetString();
    fDeltaFrac = atof( ((TObjString*)list->At(1))->GetString() );
  }
  else if (fVRangeMode == kAdaptive) {
    if (list->GetSize() > 1) fNEventsMin     = atoi( ((TObjString*)list->At(1))->GetString() );
    if (list->GetSize() > 2) fNEventsMax     = atoi( ((TObjString*)list->At(2))->GetString() );
    if (list->GetSize() > 3) fMaxVIterations = atoi( ((TObjString*)list->At(3))->GetString() );
    if (list->GetSize() > 4) fInitialScale   = atof( ((TObjString*)list->At(4))->GetString() );
  }

  if (Verbose()) {
    cout << "--- " << GetName()
	 << " <verbose>: interpreted option string: vRangeMethod: '"
	 << (const char*)((fVRangeMode == kMinMax) ? "MinMax" :
			  (fVRangeMode == kRMS   ) ? "RMS" : "Adaptive")
	 << "'" << endl;
    if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
      cout << "--- " << GetName() << ": deltaFrac: " << fDeltaFrac << endl;
    else
      cout << "--- " << GetName()
	   << " <verbose>: nEventsMin/Max, maxVIterations, initialScale: "
	   << fNEventsMin << "  " << fNEventsMax
	   << "  " << fMaxVIterations << "  " << fInitialScale << endl;
  }
}

//_______________________________________________________________________
 TMVA_MethodPDERS::TMVA_MethodPDERS( vector<TString> *theVariables,
				    TString theWeightFile,
				    TDirectory* theTargetDir )
  : TMVA_MethodBase( theVariables, theWeightFile, theTargetDir )
{
  InitPDERS();
}

 void TMVA_MethodPDERS::InitPDERS( void )
{
  fMethodName  = "PDERS";
  fMethod      = TMVA_Types::PDERS;
  fTestvar     = fTestvarPrefix+GetMethodName();
  fFin         = NULL;
  fBinaryTreeS = fBinaryTreeB = NULL;

  fgThisPDERS  = this;

  // default options
  fDeltaFrac      = 3.0;
  fVRangeMode     = kAdaptive;

  // special options for Adaptive mode
  fNEventsMin     = 100;
  fNEventsMax     = 200;
  fMaxVIterations = 50;
  fInitialScale   = 0.99;

  fInitializedVolumeEle = kFALSE;
}

//_______________________________________________________________________
 TMVA_MethodPDERS::~TMVA_MethodPDERS( void )
{
  if (NULL != fBinaryTreeS) delete fBinaryTreeS;
  if (NULL != fBinaryTreeB) delete fBinaryTreeB;
  if (NULL != fFin) { fFin->Close(); fFin->Delete(); }
}

//_______________________________________________________________________
 void TMVA_MethodPDERS::Train( void )
{
  //--------------------------------------------------------------
  // this is a dummy training: the preparation work to do is the construction
  // of the binary tree as a pointer chain. It is easier to directly save the
  // trainingTree in the weight file, and to rebuild the binary tree in the
  // test phase from scratch

  // default sanity checks
  if (!CheckSanity()) {
    cout << "--- " << GetName() << ": Error: sanity check failed" << endl;
    throw std::invalid_argument( "Abort" );
  }

  // write weights to file
  WriteWeightsToFile();
}

//_______________________________________________________________________
 Double_t TMVA_MethodPDERS::GetMvaValue( TMVA_Event *e )
{
  // init the size of a volume element using a defined fraction of the
  // volume containing the entire events
  if (fInitializedVolumeEle == kFALSE) {

    fInitializedVolumeEle = kTRUE;
    SetVolumeElement();

    // create binary trees (global member variables) for signal and background
    Int_t nS = 0, nB = 0;
    fBinaryTreeS = new TMVA_BinarySearchTree();
    fBinaryTreeS->Fill( fTrainingTree, fInputVars, nS, 1 );
    fBinaryTreeB = new TMVA_BinarySearchTree();
    fBinaryTreeB->Fill( fTrainingTree, fInputVars, nB, 0 );

    // sanity check
    if (NULL == fBinaryTreeS || NULL == fBinaryTreeB) {
      cout << "--- " << GetName() << ": Error in ::Train: Create(BinaryTree) returned zero "
	   << "binaryTree pointer(s): " << fBinaryTreeS << "  " << fBinaryTreeB << endl;
      throw std::invalid_argument( "Abort" );
    }

    // these are the signal and background scales for the weights
    fScaleS = 1.0/Float_t(nS);
    fScaleB = 1.0/Float_t(nB);
    if (Verbose()) cout << "--- " << GetName() << " <verbose>: signal and background scales: "
			<< fScaleS << " " << fScaleB << endl;
  }

  return this->RScalc( e );
}

//_______________________________________________________________________
 void TMVA_MethodPDERS::SetVolumeElement( void )
{
  // init relative scales
  fDelta = (fNvar > 0) ? new vector<Float_t>( fNvar ) : 0;
  fShift = (fNvar > 0) ? new vector<Float_t>( fNvar ) : 0;
  if (fDelta != 0) {
    for (Int_t ivar=0; ivar<fNvar; ivar++) {
      switch (fVRangeMode) {

      case kRMS:
      case kAdaptive:
	Double_t meanS, meanB, rmsS, rmsB, xmin, xmax;
	TMVA_Tools::ComputeStat( fTrainingTree, (*fInputVars)[ivar],
				 meanS, meanB, rmsS, rmsB, xmin, xmax );
	(*fDelta)[ivar] = (rmsS + rmsB)*0.5*fDeltaFrac;
	if (Verbose())
	  cout << "--- " << GetName() << " <verbose>: delta of var[" << (*fInputVars)[ivar]
	       << "\t]: " << (rmsS + rmsB)*0.5
	       << "\t  |  comp with d|norm|: " << (GetXmaxNorm( ivar ) - GetXminNorm( ivar ))
	       << endl;
	break;

      case kMinMax:
	(*fDelta)[ivar] = (GetXmaxNorm( ivar ) - GetXminNorm( ivar ))*fDeltaFrac;
	break;

      default:
	cout << "--- " << GetName()
	     << ": Error in ::SetVolumeElement: unknown range-set mode: "
	     << fVRangeMode << endl;
	throw std::invalid_argument( "Abort" );
      }

      (*fShift)[ivar] = 0.5; // volume is centered around test value
    }
  }
  else {
    cout << "--- " << GetName() << ": Error: fNvar <= 0: " << fNvar << endl;
    throw std::invalid_argument("");
  }
}

TMVA_MethodPDERS* TMVA_MethodPDERS::fgThisPDERS = NULL;

//_______________________________________________________________________
 Double_t TMVA_MethodPDERS::IGetVolumeContentForRoot( Double_t scale )
{
  return TMVA_MethodPDERS::ThisPDERS()->GetVolumeContentForRoot( scale );
}

//_______________________________________________________________________
 Double_t TMVA_MethodPDERS::GetVolumeContentForRoot( Double_t scale )
{
  // search for events in rescaled volume
  // retrieve the class object

  TMVA_Volume v( *fHelpVolume );
  v.ScaleInterval( scale );
  Double_t cS = GetBinaryTreeSig()->SearchVolume( &v );
  Double_t cB = GetBinaryTreeBkg()->SearchVolume( &v );
  v.Delete();

  return cS + cB;
}

//_______________________________________________________________________
 Float_t TMVA_MethodPDERS::RScalc( TMVA_Event *e )
{
  vector<Double_t> *lb = new vector<Double_t>( fNvar );
  for (Int_t ivar=0; ivar<fNvar; ivar++)
    (*lb)[ivar] = e->GetData(ivar);

  vector<Double_t> *ub = new vector<Double_t>( *lb );
  for (Int_t ivar=0; ivar<fNvar; ivar++) {
    (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
    (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
  }

  TMVA_Volume* volume = new TMVA_Volume( lb, ub );

  Float_t countS = 0;
  Float_t countB = 0;

  // -------------------------------------------------------------------------
  //
  // ==== test of volume search =====
  //
  // #define TMVA_MethodPDERS__countByHand__Debug__
#ifdef  TMVA_MethodPDERS__countByHand__Debug__
  // starting values
  countS = fBinaryTreeS->SearchVolume( volume );
  countB = fBinaryTreeB->SearchVolume( volume );

  Int_t iS = 0, iB = 0;
  for (Int_t ievt_=0; ievt_<fTrainingTree->GetEntries(); ievt_++) {

    Bool_t inV;
    for (Int_t ivar=0; ivar<fNvar; ivar++) {
      Float_t x = TMVA_Tools::GetValue( fTrainingTree, ievt_, (*fInputVars)[ivar] );
      inV = (x > (*volume->Lower)[ivar] && x <= (*volume->Upper)[ivar]);
      if (!inV) break;
    }
    if (inV) {
      if ((Int_t)TMVA_Tools::GetValue( fTrainingTree, ievt_, "type" ) == 1)
	iS++;
      else
	iB++;
    }
  }
  cout << "--- " << GetName() << ": debug: my test: S/B: "
       << iS << "  " << iB << endl;
  cout << "--- " << GetName() << ": debug: binTree: S/B: "
       << countS << "  " << countB << endl << endl;
#endif
  // -------------------------------------------------------------------------

  // adaptive volume

  if (fVRangeMode == kAdaptive) {

    // -----------------------------------------------------------------------

    if (TMVA_MethodPDERS_UseFindRoot) {

      fHelpVolume = new TMVA_Volume( *volume );

      TMVA_RootFinder rootFinder( &IGetVolumeContentForRoot, 0.01, 50, 50, 10 );
      Double_t scale = rootFinder.Root( (fNEventsMin + fNEventsMax)/2.0 );

      TMVA_Volume v( *volume );
      v.ScaleInterval( scale );

      // improve PDE by estimate of the kernel within volume
      if (TMVA_MethodPDERS_UseKernelEstimate) {
	std::vector<TMVA_Event*> eventsS;
	std::vector<TMVA_Event*> eventsB;
	fBinaryTreeS->SearchVolume( &v, &eventsS );
	fBinaryTreeB->SearchVolume( &v, &eventsB );
	countS = KernelEstimate( *e, eventsS, v );
	countB = KernelEstimate( *e, eventsB, v );
      }
      else {
	countS = fBinaryTreeS->SearchVolume( &v );
	countB = fBinaryTreeB->SearchVolume( &v );
      }

      v.Delete();

      fHelpVolume->Delete();
      delete fHelpVolume; fHelpVolume = NULL;

    }
    // -----------------------------------------------------------------------
    else {

      // starting values
      countS = fBinaryTreeS->SearchVolume( volume );
      countB = fBinaryTreeB->SearchVolume( volume );

      Float_t nEventsO = countS + countB;
      Int_t i_=0;
      while (nEventsO < fNEventsMin) { // this isn't a sain start... try again
	volume->ScaleInterval( 1.15 );
	countS = fBinaryTreeS->SearchVolume( volume );
	countB = fBinaryTreeB->SearchVolume( volume );
	nEventsO = countS + countB;
	i_++;
      }
      if (i_ > 50) cout << "--- " << GetName() << ": Warning in event: " << e
			<< ": adaptive volume pre-adjustment reached "
			<< ">50 iterations in while loop (" << i_ << ")" << endl;

      Float_t nEventsN = nEventsO;
      Float_t nEventsE = 0.5*(fNEventsMin + fNEventsMax);
      Float_t scaleO   = 1.0;
      Float_t scaleN   = fInitialScale;
      Float_t scale    = scaleN;

      Float_t cS = countS;
      Float_t cB = countB;

      for (Int_t ic=1; ic<fMaxVIterations; ic++) {
	if (nEventsN < fNEventsMin || nEventsN > fNEventsMax) {

	  // search for events in rescaled volume
	  TMVA_Volume* v = new TMVA_Volume( *volume );
	  v->ScaleInterval( scale );
	  cS       = fBinaryTreeS->SearchVolume( v );
	  cB       = fBinaryTreeB->SearchVolume( v );
	  nEventsN = cS + cB;

	  // determine next iteration (linear approximation)
	  if (nEventsN > 1 && nEventsN - nEventsO != 0)
	    if (scaleN - scaleO != 0)
	      scale += (scaleN - scaleO)/(nEventsN - nEventsO)*(nEventsE - nEventsN);
	    else
	      scale += (-0.01); // should not actually occur...
	  else
	    scale += 0.5; // use much larger volume

	  // save old scale
	  scaleN   = scale;

	  // take if better (don't accept it if too small number of events)
	  if (TMath::Abs(cS + cB - nEventsE) < TMath::Abs(countS + countB - nEventsE) &&
	      (cS + cB >= fNEventsMin || countS + countB < cS + cB)) {
	    countS = cS; countB = cB;
	  }

	  v->Delete();
	  delete v;
	}
	else break;
      }

      // last sanity check
      nEventsN = countS + countB;
      // include "1" to cover float precision
      if (nEventsN < fNEventsMin-1 || nEventsN > fNEventsMax+1)
	cout << "--- " << GetName() << ": Warning in event " << e
	     << ": adaptive volume adjustment reached "
	     << "max. #iterations (" << fMaxVIterations << ")"
	     << "[ nEvents: " << nEventsN << "  " << fNEventsMin << "  " << fNEventsMax << "]"
	     << endl;
    }

  } // end of adaptive method
  // -----------------------------------------------------------------------

  volume->Delete();
  delete volume;

  if (countS < 1e-20 && countB < 1e-20) return 0.5;
  if (countB < 1e-20) return 1.0;
  if (countS < 1e-20) return 0.0;

  Float_t r = countB*fScaleB/(countS*fScaleS);
  return 1.0/(r + 1.0);
}

//_______________________________________________________________________
 Double_t TMVA_MethodPDERS::KernelEstimate( TMVA_Event& event,
					   vector<TMVA_Event*>& events, TMVA_Volume& v )
{
  // define gaussian sigmas
  Double_t fac = 0.2;
  Double_t *sigma = new Double_t[fNvar];
  for (Int_t ivar=0; ivar<fNvar; ivar++)
    sigma[ivar] = ((*v.fUpper)[ivar] - (*v.fLower)[ivar])*fac;

  Double_t pdfSum = 0;
  for (vector<TMVA_Event*>::iterator iev = events.begin(); iev != events.end(); iev++) {

    Double_t pdf = 1;
    for (Int_t ivar=0; ivar<fNvar; ivar++)
      pdf *= TMath::Gaus( event.GetData(ivar), (*iev)->GetData(ivar), sigma[ivar], kTRUE );

    pdfSum += pdf;
  }
  delete [] sigma;

  return pdfSum;
}

//_______________________________________________________________________
 Float_t TMVA_MethodPDERS::GetError( Float_t countS, Float_t countB,
				    Float_t sumW2S, Float_t sumW2B ) const
{
  Float_t c = fScaleB/fScaleS;
  Float_t d = countS + c*countB; d *= d;

  if (d < 1e-10) return 1; // Error is zero because of B = S = 0

  Float_t f = c*c/d/d;
  Float_t err = f*countB*countB*sumW2S + f*countS*countS*sumW2B;

  if (err < 1e-10) return 1; // Error is zero because of B or S = 0

  return sqrt(err);
}

//_______________________________________________________________________
 void TMVA_MethodPDERS::WriteWeightsToFile( void )
{
  // write coefficients to file
  TString fname = GetWeightFileName() + ".root";
  cout << "--- " << GetName() << ": creating weight file: " << fname << endl;
  TFile *fout = new TFile( fname, "RECREATE" );

  // build TList of input variables, and TVectors for min/max
  // NOTE: the latter values are mandatory for the normalisation
  // in the reader application !!!
  TList    lvar;
  TVectorT<double> vmin( fNvar ), vmax( fNvar );
  for (Int_t ivar=0; ivar<fNvar; ivar++) {
    lvar.Add( new TNamed( (*fInputVars)[ivar], TString() ) );
    vmin[ivar] = this->GetXminNorm( ivar );
    vmax[ivar] = this->GetXmaxNorm( ivar );
  }
  // write to file
  lvar.Write();
  vmin.Write( "vmin" );
  vmax.Write( "vmax" );
  lvar.Delete();

  // save other configuration options
  // (best would be to use a TMap here, but found implementation really complicated)
  TVectorT<double> pdersOptions( 6 );
  pdersOptions(0) = (Double_t)fVRangeMode;
  pdersOptions(1) = (Double_t)fDeltaFrac;
  pdersOptions(2) = (Double_t)fNEventsMin;
  pdersOptions(3) = (Double_t)fNEventsMax;
  pdersOptions(4) = (Double_t)fMaxVIterations;
  pdersOptions(5) = (Double_t)fInitialScale;
  pdersOptions.Write( "PdersOptions" );

  // write trainingTree
  // create clone of fTrainingTree in new file
  TObjArrayIter branchIter( fTrainingTree->GetListOfBranches(), kIterForward );
  TBranch*      branch = NULL;
  Float_t      *branchVar[1000]; //please check
  Int_t         theType, jvar = -1;
  while ((branch = (TBranch*)branchIter.Next()) != 0) {

    // note: allowed are only variables with minimum and maximum cut
    //       i.e., no distinct cut regions are supported
    if ((TString)branch->GetName() == "type")
      fTrainingTree->SetBranchAddress( branch->GetName(), &theType );
    else
      fTrainingTree->SetBranchAddress( branch->GetName(), &branchVar[++jvar] );
  }

  fTrainingTree->SetBranchStatus( "*", 1 );
  TTree *trainingTreeClone = fTrainingTree->CloneTree();
  trainingTreeClone->Write( "trainingTree" );

  fout->Close();
  delete fout;
}

//_______________________________________________________________________
 void TMVA_MethodPDERS::ReadWeightsFromFile( void )
{
  // read coefficients from file
  TString fname = GetWeightFileName();
  if (!fname.EndsWith( ".root" )) fname += ".root";

  cout << "--- " << GetName() << ": reading weight file: " << fname << endl;
  fFin = new TFile( fname );

  // build TList of input variables, and TVectors for min/max
  // NOTE: the latter values are mandatory for the normalisation
  // in the reader application !!!
  TList lvar;
  for (Int_t ivar=0; ivar<fNvar; ivar++) {
    // read variable names
    TNamed t;
    t.Read( (*fInputVars)[ivar] );
    // sanity check
    if (t.GetName() != (*fInputVars)[ivar]) {
      cout << "--- " << GetName() << ": Error while reading weight file; "
	   << "unknown variable: " << t.GetName() << " at position: " << ivar << ". "
	   << "Expected variable: " << (*fInputVars)[ivar] << endl;
      throw std::invalid_argument( "Abort" );
    }
  }

  // read vectors
  TVectorT<double> vmin( fNvar ), vmax( fNvar );
  // unfortunatly the more elegant vmin/max.Read( "vmin/max" ) crash in ROOT V4.04.02g
  TVectorT<double> *tmp = (TVectorT<double>*)fFin->Get( "vmin" );
  vmin = *tmp;
  tmp  = (TVectorT<double>*)fFin->Get( "vmax" );
  vmax = *tmp;

  // initialize min/max
  for (Int_t ivar=0; ivar<fNvar; ivar++) {
    this->SetXminNorm( ivar, vmin[ivar] );
    this->SetXmaxNorm( ivar, vmax[ivar] );
  }

  // read other configuration options
  TVectorT<double>* pdersOptions = (TVectorT<double>*)fFin->Get( "PdersOptions" );
  fVRangeMode     = (VolumeRangeMode)(Int_t)(*pdersOptions)(0);
  fDeltaFrac	   = (*pdersOptions)(1);
  fNEventsMin	   = (Int_t)(*pdersOptions)(2);
  fNEventsMax	   = (Int_t)(*pdersOptions)(3);
  fMaxVIterations = (Int_t)(*pdersOptions)(4);
  fInitialScale   = (*pdersOptions)(5);

  // read the trainingTree
  fTrainingTree = (TTree*)fFin->Get( "trainingTree" );

  if (NULL == fTrainingTree) {
    cout << "--- " << GetName() << ": Error while reading 'trainingTree': zero pointer "
	 << endl;
    throw std::invalid_argument( "Abort" );
  }
}

//_______________________________________________________________________
 void  TMVA_MethodPDERS::WriteHistosToFile( void )
{
  cout << "--- " << GetName() << ": write " << GetName()
       <<" special histos to file: " << fBaseDir->GetPath() << endl;
}



ROOT page - Class index - Class Hierarchy - Top of the page

This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.