// @(#)root/tmva $Id: TMVA_MethodCuts.cxx,v 1.6 2006/05/09 08:37:06 brun Exp $
// Author: Andreas Hoecker, Peter Speckmayer, Helge Voss, Kai Voss
/**********************************************************************************
* Project: TMVA - a Root-integrated toolkit for multivariate Data analysis *
* Package: TMVA *
* Class : TMVA_MethodCuts *
* *
* Description: *
* Implementation (see header for description) *
* *
* Authors (alphabetical): *
* Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
* Xavier Prudent <prudent@lapp.in2p3.fr> - LAPP, France *
* Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
* Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Germany *
* Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
* *
* Copyright (c) 2005: *
* CERN, Switzerland, *
* U. of Victoria, Canada, *
* MPI-KP Heidelberg, Germany, *
* LAPP, Annecy, France *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted according to the terms listed in LICENSE *
* (http://mva.sourceforge.net/license.txt) *
* *
**********************************************************************************/
//_______________________________________________________________________
//
// Multivariate optimisation of signal efficiency for given background
// efficiency, using rectangular minimum and maximum requirements on
//_______________________________________________________________________
#include <stdio.h>
#include "time.h"
#include "TMVA_MethodCuts.h"
#include "TMVA_GeneticCuts.h"
#include "TMVA_Tools.h"
#include "TMVA_Timer.h"
#include "Riostream.h"
#include "TH1F.h"
#include "TObjString.h"
#define DEBUG_TMVA_MethodCuts kTRUE
ClassImp(TMVA_MethodCuts)
// init global variables
TMVA_MethodCuts* TMVA_MethodCuts::fgThisCuts = NULL;
//_______________________________________________________________________
TMVA_MethodCuts::TMVA_MethodCuts( TString jobName, vector<TString>* theVariables,
TTree* theTree, TString theOption, TDirectory* theTargetDir )
: TMVA_MethodBase( jobName, theVariables, theTree, theOption, theTargetDir )
{
InitCuts();
// ----------------------------------------------------------------------------------
// interpret option string
// format of option string: Method:nRandCuts:Option_var1:...:Option_varn
// "Method" can be:
// - "MC" : Monte-Carlo optimization (recommended)
// - "FitSel": Minuit Fit: "FitSel_Migrad" or "FitSel_Simplex": using event selection
// - "FitPDF": Minuit Fit: "FitPDF_Migrad" or "FitPDF_Simplex" PDF-based
// (only useful for uncorrelated input variables)
// "option_vari" can be
// - "FMax" : ForceMax (the max cut is fixed to maximum of variable i)
// - "FMin" : ForceMin (the min cut is fixed to minimum of variable i)
// - "FSmart": ForceSmart (the min or max cut is fixed to min/max, based on mean value)
// - Adding "All" to "option_vari", eg, "AllFSmart" will use this option for all variables
// - if "option_vari" is empty (== ""), no assumptions on cut min/max are made
// ----------------------------------------------------------------------------------
TList* list = TMVA_Tools::ParseFormatLine( fOptions );
if (list->GetSize()<1) {
fOptions = "MC:10000:";
cout << "--- " << GetName() << ": problems with options string, using default: "
<< fOptions << endl;
list = TMVA_Tools::ParseFormatLine( fOptions );
}
// interpret string
// which optimisation Method
TString s = ((TObjString*)list->At(0))->GetString();
s.ToUpper();
if (s.Contains( "MC" )) fFitMethod = kUseMonteCarlo;
else if (s.Contains( "GA" )) fFitMethod = kUseGeneticAlgorithm;
else {
cout << "--- " << GetName() << ": unknown entry in field 0 of option string: "
<< s << " ==> abort" << endl;
exit(1);
}
if (list->GetSize() > 1) { // options are specified
s = ((TObjString*)list->At(1))->GetString();
s.ToUpper();
if (s.Contains( "EFFSEL" )) fEffMethod = kUseEventSelection; // highly recommended
else if (s.Contains( "EFFPDF" )) fEffMethod = kUsePDFs;
else fEffMethod = kUseEventSelection;
}
// options output
cout << "--- " << GetName() << ": interpret options string: '" << fOptions << "'" << endl;
printf( "--- %s: --> use optimization method: '%s'\n",
GetName(), (fFitMethod == kUseMonteCarlo) ? "Monte Carlo" : "Genetic Algorithm" );
printf( "--- %s: --> use efficiency computation method: '%s'\n",
GetName(), (fEffMethod == kUseEventSelection) ? "Event Selection" : "PDF" );
// -----------------------------------------------------------------------------------
// interpret for MC use
//
switch (fFitMethod) {
case kUseMonteCarlo:
if (list->GetSize() > 2) {
s = ((TObjString*)list->At(2))->GetString();
fNRandCuts = atoi( s );
if (fNRandCuts <= 1) {
cout << "--- " << GetName() << ": invalid number of MC events: " << fNRandCuts
<< " in field 2 of option string: " << s << " ==> abort" << endl;
exit(1);
}
}
cout << "--- " << GetName() << ": generate " << fNRandCuts << " random cut samples"
<< endl;
if (list->GetSize() > 3) { // options are specified
s = ((TObjString*)list->At(3))->GetString();
s.ToUpper();
if (s.Contains( "ALL" )) { // one option sets all the others
FitParameters theFitP = kNotEnforced;
if (s.Contains( "FMAX" )) theFitP = kForceMax;
else if (s.Contains( "FMIN" )) theFitP = kForceMin;
else if (s.Contains( "FSMART" )) theFitP = kForceSmart;
else if (s.Contains( "FVERYSMART" )) theFitP = kForceVerySmart;
else {
cout << "--- " << GetName() << ": unknown fit parameter option "
<< " in field 2 of option string: " << s << " ==> abort" << endl;
exit(1);
}
for (Int_t ivar=0; ivar<fNvar; ivar++) (*fFitParams)[ivar] = theFitP;
if (theFitP != kNotEnforced)
cout << "--- " << GetName() << ": use 'smart' cuts" << endl;
}
else { // individual options
for (Int_t ivar=0; ivar<fNvar; ivar++) {
if (list->GetSize() >= 3+ivar) {
s = ((TObjString*)list->At(2+ivar))->GetString();
s.ToUpper();
FitParameters theFitP = kNotEnforced;
if (s == "" || s == "NOTENFORCED") theFitP = kNotEnforced;
else if (s.Contains( "FMAX" )) theFitP = kForceMax;
else if (s.Contains( "FMIN" )) theFitP = kForceMin;
else if (s.Contains( "FSMART" )) theFitP = kForceSmart;
else if (s.Contains( "FVERYSMART" )) theFitP = kForceVerySmart;
else {
cout << "--- " << GetName() << ": unknown fit parameter option "
<< " in field " << ivar+3 << " (var: " << ivar
<< " of option string: " << s << " ==> abort" << endl;
exit(1);
}
(*fFitParams)[ivar] = theFitP;
if (theFitP != kNotEnforced)
cout << "--- " << GetName() << ": use 'smart' cuts for variable: "
<< "'" << (*fInputVars)[ivar] << "'" << endl;
}
}
}
}
break;
// -----------------------------------------------------------------------------------
// interpret for GA use
//
case kUseGeneticAlgorithm:
if (list->GetSize() > 2) {
s = ((TObjString*)list->At(2))->GetString(); fGa_nsteps = atoi( s );
if (list->GetSize() > 3) {
s = ((TObjString*)list->At(3))->GetString(); fGa_preCalc = atoi( s );
if (list->GetSize() > 4) {
s = ((TObjString*)list->At(4))->GetString(); fGa_SC_steps = atoi( s );
if (list->GetSize() > 5) {
s = ((TObjString*)list->At(5))->GetString(); fGa_SC_offsteps = atoi( s );
if (list->GetSize() > 6) {
s = ((TObjString*)list->At(6))->GetString(); fGa_SC_factor = atof( s );
}
}
}
}
}
break;
default:
cout << "--- " << GetName() << ": Error: unknown method: " << fFitMethod
<< " ==> abort" << endl;
exit(1);
}
if (fFitMethod == kUseMonteCarlo)
printf( "--- %s: --> number of MC events to be generated: %i\n", GetName(), fNRandCuts );
for (Int_t ivar=0; ivar<fNvar; ivar++) {
TString theFitOption = ( ((*fFitParams)[ivar] == kNotEnforced) ? "NotEnforced" :
((*fFitParams)[ivar] == kForceMin ) ? "ForceMin" :
((*fFitParams)[ivar] == kForceMax ) ? "ForceMax" :
((*fFitParams)[ivar] == kForceSmart ) ? "ForceSmart" :
((*fFitParams)[ivar] == kForceVerySmart ) ? "ForceVerySmart" : "other" );
printf( "--- %s: --> option for variable: %s: '%s' (#: %i)\n",
GetName(), (const char*)(*fInputVars)[ivar], (const char*)theFitOption,
(Int_t)(*fFitParams)[ivar] );
}
// ----------------------------------------------------------------------------------
}
//_______________________________________________________________________
TMVA_MethodCuts::TMVA_MethodCuts( vector<TString> *theVariables,
TString theWeightFile,
TDirectory* theTargetDir )
: TMVA_MethodBase( theVariables, theWeightFile, theTargetDir )
{
InitCuts();
}
//_______________________________________________________________________
void TMVA_MethodCuts::InitCuts( void )
{
fMethodName = "Cuts";
fMethod = TMVA_Types::Cuts;
fTestvar = fTestvarPrefix+GetMethodName();
fConstrainType = kConstrainEffS;
fVarHistS = fVarHistB = 0;
fVarHistS_smooth = fVarHistB_smooth = 0;
fVarPdfS = fVarPdfB = 0;
fFitParams = 0;
fEffBvsSLocal = 0;
fBinaryTreeS = fBinaryTreeB = 0;
fEffSMin = 0;
fEffSMax = 0;
// MC defaults
fNRandCuts = 100000;
// GA defaults
fGa_preCalc = 3;
fGa_SC_steps = 10;
fGa_SC_offsteps = 5;
fGa_SC_factor = 0.95;
fGa_nsteps = 30;
fgThisCuts = this;
// vector with fit results
fNpar = 2*fNvar;
fPar0 = new vector<Double_t>( fNpar );
fRangeSign = new vector<Int_t> ( fNvar );
fMeanS = new vector<Double_t>( fNvar );
fMeanB = new vector<Double_t>( fNvar );
fRmsS = new vector<Double_t>( fNvar );
fRmsB = new vector<Double_t>( fNvar );
fXmin = new vector<Double_t>( fNvar );
fXmax = new vector<Double_t>( fNvar );
// get the variable specific options, first initialize default
fFitParams = new vector<FitParameters>( fNvar );
for (Int_t ivar=0; ivar<fNvar; ivar++) (*fFitParams)[ivar] = kNotEnforced;
fTrandom = new TRandom( 0 ); // set seed
fFitMethod = kUseMonteCarlo;
fTestSignalEff = -1;
// create LUT for cuts
fCutMin = new Double_t*[fNvar];
fCutMax = new Double_t*[fNvar];
for (Int_t i=0;i<fNvar;i++) {
fCutMin[i] = new Double_t[fNbins];
fCutMax[i] = new Double_t[fNbins];
}
// init
for (Int_t ivar=0; ivar<fNvar; ivar++) {
for (Int_t ibin=0; ibin<fNbins; ibin++) {
fCutMin[ivar][ibin] = 0;
fCutMax[ivar][ibin] = 0;
}
}
}
//_______________________________________________________________________
TMVA_MethodCuts::~TMVA_MethodCuts( void )
{
if (Verbose()){
cout << "--- TMVA_MethodCuts: Destructor called " << endl;
}
delete fPar0;
delete fRangeSign;
delete fTrandom;
delete fMeanS;
delete fMeanB;
delete fRmsS;
delete fRmsB;
delete fXmin;
delete fXmax;
for (Int_t i=0;i<fNvar;i++) {
if (fCutMin[i] != NULL) delete [] fCutMin[i];
if (fCutMax[i] != NULL) delete [] fCutMax[i];
}
if (NULL != fBinaryTreeS) delete fBinaryTreeS;
if (NULL != fBinaryTreeB) delete fBinaryTreeB;
}
//_______________________________________________________________________
Double_t TMVA_MethodCuts::GetMvaValue( TMVA_Event *e )
{
// evaluation
// sanity check
if (fCutMin == NULL || fCutMax == NULL || fNbins == 0) {
cerr << "--- " << GetName() << "::Eval_Cuts: Fatal Error: fCutMin/Max have zero pointer. "
<< "Did you book Cuts ? ==> abort" << endl;
exit(1);
}
// sanity check
if (fTestSignalEff > 0) {
// get efficiency bin
Int_t ibin = int((fTestSignalEff - fEffSMin)/(fEffSMax - fEffSMin)*Double_t(fNbins));
if (ibin < 0 ) ibin = 0;
if (ibin >= fNbins) ibin = fNbins - 1;
Bool_t passed = kTRUE;
for (size_t ivar=0; ivar<e->GetData().size(); ivar++) {
//please check this statement (machine dependent)
passed *= (e->GetData()[ivar] >= fCutMin[ivar][ibin] && e->GetData()[ivar] <= fCutMax[ivar][ibin]);
}
return (Double_t)passed;
}
else return 0;
}
//_______________________________________________________________________
void TMVA_MethodCuts::Train( void )
{
// trainning method
// perform basic sanity chacks
if (!SanityChecks()) {
cout << "--- " << GetName() << ": Error: Basic sanity checks failed ==> abort"
<< endl;
exit(1);
}
if (fEffMethod == kUsePDFs) CreateVariablePDFs(); // create PDFs for variables
// get background efficiency for which the signal efficiency
// ought to be maximized
fConstrainType = kConstrainEffS;
// create binary trees (global member variables) for signal and background
Int_t dummy;
fBinaryTreeS = new TMVA_BinarySearchTree();
fBinaryTreeS->Fill( fTrainingTree, fInputVars, dummy, 1 );
fBinaryTreeB = new TMVA_BinarySearchTree();
fBinaryTreeB->Fill( fTrainingTree, fInputVars, dummy, 0 );
// init basic statistics
TObjArrayIter branchIter( fTrainingTree->GetListOfBranches(), kIterForward );
TBranch* branch = 0;
Int_t ivar = -1;
//TString branchName[fNvar];
//Float_t branchVar[fNvar];
TString branchName[1000]; //please check
Float_t branchVar[1000]; //please check
Int_t theType;
vector<TH1F*> signalDist, bkgDist;
while ((branch = (TBranch*)branchIter.Next()) != 0) {
// note: allowed are only variables with minimum and maximum cut
// i.e., no distinct cut regions are supported
if ((TString)branch->GetName() == "type") {
fTrainingTree->SetBranchAddress( branch->GetName(), &theType );
}
else {
++ivar;
branchName[ivar] = branch->GetName();
fTrainingTree->SetBranchAddress( branchName[ivar], &branchVar[ivar] );
// determine mean and rms to obtain appropriate starting values
TMVA_Tools::ComputeStat( fTrainingTree, branchName[ivar],
(*fMeanS)[ivar], (*fMeanB)[ivar],
(*fRmsS)[ivar], (*fRmsB)[ivar],
(*fXmin)[ivar], (*fXmax)[ivar] );
// I want to use these distributions later to steer the MC-Method a bit into the
// direction where the difference in the distributions for BKG and Signal are largest
TString name = Form( "sigDistVar%d",ivar );
signalDist.push_back( (TH1F*)TMVA_Tools::projNormTH1F( fTrainingTree, branchName[ivar], name, 50,
(*fXmin)[ivar], (*fXmax)[ivar],
"type==1" ) );
name = Form( "bkgDistVar%d",ivar );
bkgDist.push_back( (TH1F*)TMVA_Tools::projNormTH1F( fTrainingTree, branchName[ivar], name,50,
(*fXmin)[ivar],(*fXmax)[ivar],
"type==0" ) );
if ((*fInputVars)[ivar] != branchName[ivar]) {
cout << "Error in: " << GetName() << "::Train: mismatch in variables ==> abort: "
<< ivar << " " << (*fInputVars)[ivar] << " " << branchName[ivar]
<< endl;
exit(1);
}
}
}
// determine eff(B) versus eff(S) plot
fConstrainType = kConstrainEffS;
Int_t ibin=0;
fEffBvsSLocal = new TH1F( fTestvar + "_effBvsSLocal",
TString(GetName()) + " efficiency of B vs S",
fNbins, 0.0, 1.0 );
// init
for (ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 );
// --------------------------------------------------------------------------
if (fFitMethod == kUseMonteCarlo) {
// generate MC cuts
//Double_t cutMin[fNvar];
//Double_t cutMax[fNvar];
Double_t cutMin[1000]; //please check
Double_t cutMax[1000]; //please check
// MC loop
cout << "--- " << GetName() << ": Generating " << fNRandCuts
<< " cycles (random cuts) ... patience please" << endl;
Int_t nBinsFilled=0, nBinsFilledAt=0;
// timing of MC
TMVA_Timer timer( fNRandCuts, GetName() );
for (Int_t imc=0; imc<fNRandCuts; imc++) {
// generate random cuts
for (Int_t ivar=0; ivar<fNvar; ivar++) {
FitParameters fitParam = (*fFitParams)[ivar];
if (fitParam == kForceSmart) {
if ((*fMeanS)[ivar] > (*fMeanB)[ivar]) fitParam = kForceMax;
else fitParam = kForceMin;
}
if (fitParam == kForceMin)
cutMin[ivar] = (*fXmin)[ivar];
else
cutMin[ivar] = fTrandom->Rndm()*((*fXmax)[ivar] - (*fXmin)[ivar]) + (*fXmin)[ivar];
if (fitParam == kForceMax)
cutMax[ivar] = (*fXmax)[ivar];
else
cutMax[ivar] = fTrandom->Rndm()*((*fXmax)[ivar] - cutMin[ivar] ) + cutMin[ivar];
if (fitParam == kForceVerySmart){
// generate random cut parameters gaussian distrubuted around the variable values
// where the difference between signal and background is maximal
// get the variable distributions:
cutMin[ivar] = fTrandom->Rndm()*((*fXmax)[ivar] - (*fXmin)[ivar]) + (*fXmin)[ivar];
cutMax[ivar] = fTrandom->Rndm()*((*fXmax)[ivar] - cutMin[ivar] ) + cutMin[ivar];
// ..... to be continued (Helge)
}
if (cutMax[ivar] < cutMin[ivar]) {
cout << "--- " << GetName() << ": Error in ::Train: mismatch with cuts ==> abort"
<< endl;
exit(1);
}
}
// event loop
Double_t effS = 0, effB = 0;
GetEffsfromSelection( &cutMin[0], &cutMax[0], effS, effB);
// determine bin
Int_t ibinS = (Int_t)(effS*Float_t(fNbins) + 1);
if (ibinS < 1 ) ibinS = 1;
if (ibinS > fNbins) ibinS = fNbins;
// linear extrapolation
// (not done at present --> MC will be slightly biased !
// the bias increases with the bin width)
Double_t effBH = fEffBvsSLocal->GetBinContent( ibinS );
// preliminary best event -> backup
if (effBH < 0 || effBH > effB) {
fEffBvsSLocal->SetBinContent( ibinS, effB );
for (Int_t ivar=0; ivar<fNvar; ivar++) {
fCutMin[ivar][ibinS-1] = cutMin[ivar]; // bin 1 stored in index 0
fCutMax[ivar][ibinS-1] = cutMax[ivar];
}
}
// some output to make waiting less boring
Int_t nout = 1000;
if ((Int_t)imc%nout == 0 || imc == fNRandCuts-1) {
Int_t nbinsF = 0, ibin_;
for (ibin_=0; ibin_<fNbins; ibin_++)
if (fEffBvsSLocal->GetBinContent( ibin_ +1 ) >= 0) nbinsF++;
if (nBinsFilled!=nbinsF) {
nBinsFilled = nbinsF;
nBinsFilledAt = imc;
}
timer.DrawProgressBar( imc );
if (imc == fNRandCuts-1 )
printf( "--- %s: fraction of efficiency bins filled: %3.1f\n",
GetName(), nbinsF/Float_t(fNbins) );
}
} // end of MC loop
if (this->Verbose()){
cout << "--- TMVA_MethodCuts: fraction of filled eff. bins did not increase"
<< " anymore after "<< nBinsFilledAt << " cycles" << endl;
}
// get elapsed time
cout << "--- " << GetName() << ": elapsed time: " << timer.GetElapsedTime()
<< endl;
}
// --------------------------------------------------------------------------
else if (fFitMethod == kUseGeneticAlgorithm) {
// ranges
vector<LowHigh*> ranges;
for (Int_t ivar=0; ivar<fNvar; ivar++) {
(*fRangeSign)[ivar] = +1;
ranges.push_back( new LowHigh( (*fXmin)[ivar], (*fXmax)[ivar] ) );
ranges.push_back( new LowHigh( 0, (*fXmax)[ivar] - (*fXmin)[ivar] ) );
}
TMVA_GeneticCuts *bestResultsStore = new TMVA_GeneticCuts( 0, ranges );
TMVA_GeneticCuts *bestResults = new TMVA_GeneticCuts( 0, ranges );
cout << "--- " << GetName() << ": GA: entree, please be patient ..." << endl;
// timing of MC
TMVA_Timer timer1( fGa_preCalc*fNbins, GetName() );
// precalculation
for (Int_t preCalc = 0; preCalc < fGa_preCalc; preCalc++) {
for (Int_t ibin=1; ibin<=fNbins; ibin++) {
timer1.DrawProgressBar( ibin + preCalc*fNbins );
fEffRef = fEffBvsSLocal->GetBinCenter( ibin );
// ---- perform series of fits to achieve best convergence
// "m_ga_spread" times the number of variables
TMVA_GeneticCuts ga( ranges.size() * 10, ranges );
ga.fPopulation.AddPopulation( &bestResults->fPopulation );
ga.CalculateFitness();
ga.fPopulation.TrimPopulation();
do {
ga.Init();
ga.CalculateFitness();
ga.SpreadControl( fGa_SC_steps, fGa_SC_offsteps, fGa_SC_factor );
} while (!ga.HasConverged( Int_t(fGa_nsteps*0.67), 0.0001 ));
bestResultsStore->fPopulation.GiveHint( ga.fPopulation.GetGenes( 0 )->fFactors );
}
bestResults = bestResultsStore;
bestResultsStore = new TMVA_GeneticCuts( 0, ranges );
}
bestResults->Init();
// main run
cout << "--- " << GetName() << ": GA: start main course "
<< endl;
// timing of MC
TMVA_Timer timer2( fNbins, GetName() );
for (ibin=1; ibin<=fNbins; ibin++) {
timer2.DrawProgressBar( ibin );
fEffRef = fEffBvsSLocal->GetBinCenter( ibin );
// ---- perform series of fits to achieve best convergence
TMVA_GeneticCuts ga( ranges.size() * 10, ranges ); // 10 times the number of variables
ga.fSpread = 0.1;
ga.fPopulation.AddPopulation( &bestResults->fPopulation );
ga.CalculateFitness();
ga.fPopulation.TrimPopulation();
do {
ga.Init();
ga.CalculateFitness();
ga.SpreadControl( fGa_SC_steps, fGa_SC_offsteps, fGa_SC_factor );
} while (!ga.HasConverged( fGa_nsteps, 0.00001 ));
Int_t n;
//Double_t par[2*fNvar];
Double_t par[2000]; //please check
n = 0;
for( vector< Double_t >::iterator vec = ga.fPopulation.GetGenes( 0 )->fFactors.begin();
vec < ga.fPopulation.GetGenes( 0 )->fFactors.end(); vec++ ){
par[n] = (*vec);
n++;
}
Double_t effS = 0, effB = 0;
//Double_t cutMin[fNvar];
//Double_t cutMax[fNvar];
Double_t cutMin[1000]; //please check
Double_t cutMax[1000]; //please check
this->MatchParsToCuts( par, &cutMin[0], &cutMax[0] );
this->GetEffsfromSelection( &cutMin[0], &cutMax[0], effS, effB);
for (Int_t ivar=0; ivar<fNvar; ivar++) {
fCutMin[ivar][ibin-1] = cutMin[ivar]; // bin 1 stored in index 0
fCutMax[ivar][ibin-1] = cutMax[ivar];
}
}
// get elapsed time
cout << "--- " << GetName() << ": GA: elapsed time: " << timer1.GetElapsedTime()
<< endl;
}
// --------------------------------------------------------------------------
else {
cerr << "--- " << GetName() << ": Error: unknown minization method: "
<< fFitMethod << " ==> abort" << endl;
exit(1);
}
// write weights and technical histos to file
WriteWeightsToFile();
WriteHistosToFile();
delete fEffBvsSLocal;
if (fBinaryTreeS) delete fBinaryTreeS;
if (fBinaryTreeB) delete fBinaryTreeB;
}
//_______________________________________________________________________
Double_t TMVA_MethodCuts::ComputeEstimator( Double_t *par, Int_t /*npar*/ )
{
// caution: the npar gives the _free_ parameters
// however: the "par" array contains all parameters
// determine cuts
Double_t effS = 0, effB = 0;
//Double_t cutMin[fNvar];
//Double_t cutMax[fNvar];
Double_t cutMin[1000]; //please check
Double_t cutMax[1000]; //please check
this->MatchParsToCuts( par, &cutMin[0], &cutMax[0] );
// retrieve signal and background efficiencies for given cut
switch (fEffMethod) {
case kUsePDFs:
this->GetEffsfromPDFs( &cutMin[0], &cutMax[0], effS, effB );
break;
case kUseEventSelection:
this->GetEffsfromSelection( &cutMin[0], &cutMax[0], effS, effB);
break;
default:
this->GetEffsfromSelection( &cutMin[0], &cutMax[0], effS, effB);
}
// compute estimator
const Double_t epsilon = 1.0e-06;
Double_t eta;
if (fConstrainType == kConstrainEffS) {
if (TMath::Abs(effS - fEffRef) > 0.001) eta = TMath::Abs(effB) + TMath::Abs(effS - fEffRef)/epsilon;
else eta = TMath::Abs(effB);
}
else if (fConstrainType == kConstrainEffB) {
eta = ( pow( (effB - fEffRef)/epsilon, 1 ) +
pow( 1.0/((effS > 0) ? effS : epsilon), 2 ) );
}
else eta = 0;
return eta;
}
//_______________________________________________________________________
void TMVA_MethodCuts::MatchParsToCuts( Double_t* par,
Double_t* cutMin, Double_t* cutMax )
{
for (Int_t ivar=0; ivar<fNvar; ivar++) {
Int_t ipar = 2*ivar;
cutMin[ivar] = ((*fRangeSign)[ivar] > 0) ? par[ipar] : par[ipar] - par[ipar+1];
cutMax[ivar] = ((*fRangeSign)[ivar] > 0) ? par[ipar] + par[ipar+1] : par[ipar];
}
}
//_______________________________________________________________________
void TMVA_MethodCuts::MatchCutsToPars( Double_t* par,
Double_t* cutMin, Double_t* cutMax )
{
for (Int_t ivar=0; ivar<fNvar; ivar++) {
Int_t ipar = 2*ivar;
par[ipar] = ((*fRangeSign)[ivar] > 0) ? cutMin[ivar] : cutMax[ivar];
par[ipar+1] = cutMax[ivar] - cutMin[ivar];
}
}
//_______________________________________________________________________
void TMVA_MethodCuts::GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
Double_t& effS, Double_t& effB )
{
effS = 1.0;
effB = 1.0;
for (Int_t ivar=0; ivar<fNvar; ivar++) {
effS *= (*fVarPdfS)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
effB *= (*fVarPdfB)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
}
}
//_______________________________________________________________________
void TMVA_MethodCuts::GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
Double_t& effS, Double_t& effB)
{
Float_t nTotS = 0, nTotB = 0;
Float_t nSelS = 0, nSelB = 0;
TMVA_Volume* volume = new TMVA_Volume( cutMin, cutMax, fNvar );
nSelS = fBinaryTreeS->SearchVolume( volume );
nSelB = fBinaryTreeB->SearchVolume( volume );
nTotS = Float_t(fBinaryTreeS->GetSumOfWeights());
nTotB = Float_t(fBinaryTreeB->GetSumOfWeights());
delete volume;
// sanity check
if (nTotS == 0 && nTotB == 0) {
cout << "--- " << GetName()
<< ": fatal error in::ComputeEstimator: zero total number of events:"
<< " nTotS, nTotB: " << nTotS << " " << nTotB << " ***"
<< endl;
exit(1);
}
// efficiencies
if (nTotS == 0 ) {
effS = 0;
effB = nSelB/nTotB;
cout << "--- " << GetName()
<< ": Warning in ::ComputeEstimator: zero number of events signal Events:\n";
}
else if ( nTotB == 0) {
effB = 0;
effS = nSelS/nTotS;
cout << "--- " << GetName()
<< ": Warning in ::ComputeEstimator: zero number of events background Events:\n";
}
else {
effS = nSelS/nTotS;
effB = nSelB/nTotB;
}
}
//_______________________________________________________________________
void TMVA_MethodCuts::CreateVariablePDFs( void )
{
// create list of histograms and PDFs
fVarHistS = new vector<TH1*> ( fNvar );
fVarHistB = new vector<TH1*> ( fNvar );
fVarHistS_smooth = new vector<TH1*> ( fNvar );
fVarHistB_smooth = new vector<TH1*> ( fNvar );
fVarPdfS = new vector<TMVA_PDF*>( fNvar );
fVarPdfB = new vector<TMVA_PDF*>( fNvar );
Int_t nsmooth = 0;
for (Int_t ivar=0; ivar<fNvar; ivar++) {
// ---- signal
TString histTitle = (*fInputVars)[ivar] + " signal training";
TString histName = (*fInputVars)[ivar] + "_sig";
TString drawOpt = (*fInputVars)[ivar] + ">>h(";
drawOpt += fNbins;
drawOpt += ")";
// selection
fTrainingTree->Draw( drawOpt, "type==1", "goff" );
(*fVarHistS)[ivar] = (TH1F*)gDirectory->Get("h");
(*fVarHistS)[ivar]->SetName(histName);
(*fVarHistS)[ivar]->SetTitle(histTitle);
// make copy for smoothed histos
(*fVarHistS_smooth)[ivar] = (TH1F*)(*fVarHistS)[ivar]->Clone();
histTitle = (*fInputVars)[ivar] + " signal training smoothed ";
histTitle += nsmooth;
histTitle +=" times";
histName = (*fInputVars)[ivar] + "_sig_smooth";
(*fVarHistS_smooth)[ivar]->SetName(histName);
(*fVarHistS_smooth)[ivar]->SetTitle(histTitle);
// smooth
(*fVarHistS_smooth)[ivar]->Smooth(nsmooth);
// ---- background
histTitle = (*fInputVars)[ivar] + " background training";
histName = (*fInputVars)[ivar] + "_bgd";
drawOpt = (*fInputVars)[ivar] + ">>h(";
drawOpt += fNbins;
drawOpt += ")";
fTrainingTree->Draw( drawOpt, "type==0", "goff" );
(*fVarHistB)[ivar] = (TH1F*)gDirectory->Get("h");
(*fVarHistB)[ivar]->SetName(histName);
(*fVarHistB)[ivar]->SetTitle(histTitle);
// make copy for smoothed histos
(*fVarHistB_smooth)[ivar] = (TH1F*)(*fVarHistB)[ivar]->Clone();
histTitle = (*fInputVars)[ivar]+" background training smoothed ";
histTitle += nsmooth;
histTitle +=" times";
histName = (*fInputVars)[ivar]+"_bgd_smooth";
(*fVarHistB_smooth)[ivar]->SetName(histName);
(*fVarHistB_smooth)[ivar]->SetTitle(histTitle);
// smooth
(*fVarHistB_smooth)[ivar]->Smooth(nsmooth);
// create PDFs
(*fVarPdfS)[ivar] = new TMVA_PDF( (*fVarHistS_smooth)[ivar], TMVA_PDF::kSpline2 );
(*fVarPdfB)[ivar] = new TMVA_PDF( (*fVarHistB_smooth)[ivar], TMVA_PDF::kSpline2 );
}
}
//_______________________________________________________________________
Bool_t TMVA_MethodCuts::SanityChecks( void )
{
// basic checks to ensure that assumptions on variable order are satisfied
Bool_t isOK = kTRUE;
TObjArrayIter branchIter( fTrainingTree->GetListOfBranches(), kIterForward );
TBranch* branch = 0;
Int_t ivar = -1;
while ((branch = (TBranch*)branchIter.Next()) != 0) {
TString branchName = branch->GetName();
if (branchName != "type") {
// determine mean and rms to obtain appropriate starting values
ivar++;
if ((*fInputVars)[ivar] != branchName) {
cout << "Error in: " << GetName() << "::SanityChecks: mismatch in variables ==> abort"
<< endl;
isOK = kFALSE;
}
}
}
return isOK;
}
//_______________________________________________________________________
void TMVA_MethodCuts::CheckErr( TString cmd, Int_t errFlag )
{
if (errFlag != 0)
cout << "--- " << GetName() << ": Problem in ::cmd: " << cmd
<< " / error code: " << errFlag <<" *** " << endl;
}
//_______________________________________________________________________
void TMVA_MethodCuts::WriteWeightsToFile( void )
{
// write weights to file
// though we could write the root effBvsS histogram directly, we
// prefer here to put everything into a human-readable form
TString fname = GetWeightFileName();
cout << "--- " << GetName() << ": creating weight file: " << fname << endl;
ofstream fout( fname );
if (!fout.good( )) { // file not found --> Error
cout << "--- " << GetName() << ": Error in ::WriteWeightsToFile: "
<< "unable to open output weight file: " << fname << endl;
exit(1);
}
// write variable names and min/max
// NOTE: the latter values are mandatory for the normalisation
// in the reader application !!!
fout << this->GetMethodName() <<endl;
fout << "NVars= " << fNvar <<endl;
Int_t ivar;
for (ivar=0; ivar<fNvar; ivar++) {
TString var = (*fInputVars)[ivar];
fout << var << " " << GetXminNorm( var ) << " " << GetXmaxNorm( var )
<< endl;
}
// first the dimensions
fout << "OptimisationMethod " << "nRandCuts " << "nbins:" << endl;
fout << ((fEffMethod == kUseEventSelection) ? "Fit-EventSelection" :
(fEffMethod == kUsePDFs) ? "Fit-PDF" : "Monte-Carlo") << " " ;
fout << fNRandCuts << " ";
fout << fNbins << endl;
// fout << endl;
fout << "the optimised cuts for " << fNvar << " variables" << endl;
fout << "format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0]"
<< " ... cutMin[ivar=n-1] cutMax[ivar=n-1]" << endl;
Int_t ibin;
for (ibin=0; ibin<fNbins; ibin++) {
fout << setw(4) << ibin+1 << " "
<< setw(8)<< fEffBvsSLocal->GetBinCenter( ibin +1 ) << " "
<< setw(8)<< fEffBvsSLocal->GetBinContent( ibin +1 ) << " ";
for (ivar=0; ivar<fNvar; ivar++)
fout <<setw(10)<< fCutMin[ivar][ibin] << " " << setw(10) << fCutMax[ivar][ibin] << " ";
fout << endl;
}
}
//_______________________________________________________________________
void TMVA_MethodCuts::ReadWeightsFromFile( void )
{
// read weights from file
// though we could write the root effBvsS histogram directly, we
// prefer here to put everything into a human-readable form
TString fname = GetWeightFileName();
cout << "--- " << GetName() << ": reading weight file: " << fname << endl;
ifstream fin( fname );
if (!fin.good( )) { // file not found --> Error
cout << "--- " << GetName() << ": Error in ::ReadWeightsFromFile: "
<< "unable to open input file: " << fname << endl;
exit(1);
}
// read variable names and min/max
// NOTE: the latter values are mandatory for the normalisation
// in the reader application !!!
TString var, dummy;
Double_t xmin, xmax;
fin >> dummy;
this->SetMethodName(dummy);
fin >> dummy >> fNvar;
Int_t ivar;
for (ivar=0; ivar<fNvar; ivar++) {
fin >> var >> xmin >> xmax;
// sanity check
if (var != (*fInputVars)[ivar]) {
cout << "--- " << GetName() << ": Error while reading weight file; "
<< "unknown variable: " << var << " at position: " << ivar << ". "
<< "Expected variable: " << (*fInputVars)[ivar] << " ==> abort"
<< endl;
exit(1);
}
// set min/max
this->SetXminNorm( ivar, xmin );
this->SetXmaxNorm( ivar, xmax );
}
// first the dimensions
fin >> dummy >> dummy >> dummy;
fin >> dummy >> fNRandCuts >> fNbins;
cout << "--- " << GetName() << ": Read cuts from "<< fNRandCuts << " MC events"
<< " in " << fNbins << " efficiency bins " << endl;
fin >> dummy >> dummy >> dummy >> dummy >>fNvar>>dummy ;
char buffer[200];
fin.getline(buffer,200);
fin.getline(buffer,200);
// read histogram and cuts
Int_t ibin;
Int_t tmpbin;
Float_t tmpeffS, tempeffB;
for (ibin=0; ibin<fNbins; ibin++) {
fin >> tmpbin >> tmpeffS >> tempeffB;
if (ibin == 0 ) fEffSMin = tmpeffS;
if (ibin == fNbins-1) fEffSMax = tmpeffS;
for (ivar=0; ivar<fNvar; ivar++) {
fin >> fCutMin[ivar][ibin] >> fCutMax[ivar][ibin];
}
}
}
//_______________________________________________________________________
void TMVA_MethodCuts::WriteHistosToFile( void )
{
cout << "--- " << GetName() << ": write " << GetName()
<< " special histos to file: " << fBaseDir->GetPath() << endl;
fEffBvsSLocal->Write();
// save reference histograms to file
if (fEffMethod == kUsePDFs) {
gDirectory->GetListOfKeys()->Print();
fBaseDir->mkdir(GetName()+GetMethodName())->cd();
for (Int_t ivar=0; ivar<fNvar; ivar++) {
(*fVarHistS)[ivar]->Write();
(*fVarHistB)[ivar]->Write();
(*fVarHistS_smooth)[ivar]->Write();
(*fVarHistB_smooth)[ivar]->Write();
(*fVarPdfS)[ivar]->GetPDFHist()->Write();
(*fVarPdfB)[ivar]->GetPDFHist()->Write();
}
}
}
//_______________________________________________________________________
void TMVA_MethodCuts::TestInitLocal( TTree *theTree )
{
cout << "--- " << GetName() << ": called TestInitLocal " <<endl;
// create binary trees (global member variables) for signal and background
Int_t dummy;
fBinaryTreeS = new TMVA_BinarySearchTree();
fBinaryTreeS->Fill( theTree, fInputVars, dummy, 1 );
fBinaryTreeB = new TMVA_BinarySearchTree();
fBinaryTreeB->Fill( theTree, fInputVars, dummy, 0 );
}
//_______________________________________________________________________
Double_t TMVA_MethodCuts::GetEfficiency( TString theString, TTree * /*theTree*/ )
{
// parse input string for required background efficiency
TList* list = TMVA_Tools::ParseFormatLine( theString );
// sanity check
if (list->GetSize() != 2) {
cout << "--- " << GetName() << ": Error in::GetEfficiency: wrong number of arguments"
<< " in string: " << theString
<< " | required format, e.g., Efficiency:0.05" << endl;
return -1;
}
// that will be the value of the efficiency retured (does not affect
// the efficiency-vs-bkg plot which is done anyway.
Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
if (Verbose())
cout << "--- " << GetName() << "::GetEfficiency <verbose>: compute eff(S) at eff(B) = "
<< effBref << endl;
// first round ? --> create histograms
if ( fEffBvsS == NULL || fRejBvsS == NULL) {
// there is no really good equivalent to the fEffS; fEffB (efficiency vs cutvalue)
// for the "Cuts" method (unless we had only one cut). Maybe later I might add here
// histograms for each of the cuts...but this would require also a change in the
// base class, and it is not really necessary, as we get exactly THIS info from the
// "evaluateAllVariables" anyway.
// now create efficiency curve: background versus signal
// if (NULL != fEffBvsS)fEffBvsS->Delete();
// if (NULL != fRejBvsS)fRejBvsS->Delete();
if (NULL != fEffBvsS)delete fEffBvsS;
if (NULL != fRejBvsS)delete fRejBvsS;
fEffBvsS = new TH1F( fTestvar + "_effBvsS", fTestvar + "", fNbins, 0, 1 );
fRejBvsS = new TH1F( fTestvar + "_rejBvsS", fTestvar + "", fNbins, 0, 1 );
// use root finder
// make the background-vs-signal efficiency plot
for (Int_t bini=1; bini<=fNbins; bini++) {
//Double_t tmpCutMin[nvar], tmpCutMax[nvar];
Double_t tmpCutMin[1000], tmpCutMax[1000]; //please check
for (Int_t ivar=0; ivar <fNvar; ivar++){
tmpCutMin[ivar] = fCutMin[ivar][bini-1];
tmpCutMax[ivar] = fCutMax[ivar][bini-1];
}
// find cut value corresponding to a given signal efficiency
Double_t effS, effB;
this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
// and fill histograms
fEffBvsS->SetBinContent( bini, effB );
fRejBvsS->SetBinContent( bini, 1.0-effB );
}
// create splines for histogram
fGrapheffBvsS = new TGraph( fEffBvsS );
fSpleffBvsS = new TMVA_TSpline1( "effBvsS", fGrapheffBvsS );
}
// must exist...
if (NULL == fSpleffBvsS) return 0.0;
// now find signal efficiency that corresponds to required background efficiency
Double_t effS, effB, effS_ = 0, effB_ = 0;
Int_t nbins_ = 1000;
// loop over efficiency bins until the background eff. matches the requirement
for (Int_t bini=1; bini<=nbins_; bini++) {
// get corresponding signal and background efficiencies
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSpleffBvsS->Eval( effS );
// find signal efficiency that corresponds to required background efficiency
if ((effB - effBref)*(effB_ - effBref) < 0) break;
effS_ = effS;
effB_ = effB;
}
return fEffSatB = 0.5*(effS + effS_);
}
ROOT page - Class index - Class Hierarchy - Top of the page
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.