//Begin_Html
/*
Multivariate optimisation of signal efficiency for given background
efficiency, applying rectangular minimum and maximum requirements.
<p>
Also implemented is a "decorrelate/diagonlized cuts approach",
which improves over the uncorrelated cuts ansatz by
transforming linearly the input variables into a diagonal space,
using the square-root of the covariance matrix.
<p>
<font size="-1">
Other optimisation criteria, such as maximising the signal significance-
squared, S^2/(S+B), with S and B being the signal and background yields,
correspond to a particular point in the optimised background rejection
versus signal efficiency curve. This working point requires the knowledge
of the expected yields, which is not the case in general. Note also that
for rare signals, Poissonian statistics should be used, which modifies
the significance criterion.
</font>
<p>
The rectangular cut of a volume in the variable space is performed using
a binary tree to sort the training events. This provides a significant
reduction in computing time (up to several orders of magnitudes, depending
on the complexity of the problem at hand).
<p>
Technically, optimisation is achieved in TMVA by two methods:
<ol>
<li>Monte Carlo generation using uniform priors for the lower cut value,
and the cut width, thrown within the variable ranges.
<li>A Genetic Algorithm (GA) searches for the optimal ("fittest") cut sample.
The GA is configurable by many external settings through the option
string. For difficult cases (such as many variables), some tuning
may be necessary to achieve satisfying results
</ol>
<p>
<font size="-1">
Attempts to use Minuit fits (Simplex ot Migrad) instead have not shown
superior results, and often failed due to convergence at local minima.
</font>
<p>
The tests we have performed so far showed that in generic applications,
the GA is superior to MC sampling, and hence GA is the default method.
It is worthwhile trying both anyway.
<b>Decorrelated (or "diagonalized") Cuts</b>
<p>
See class description for Method Likelihood for a detailed explanation.
*/
//End_Html
#include <stdio.h>
#include "time.h"
#include "Riostream.h"
#include "TH1F.h"
#include "TObjString.h"
#ifndef ROOT_TMVA_MethodCuts
#include "TMVA/MethodCuts.h"
#endif
#ifndef ROOT_TMVA_GeneticCuts
#include "TMVA/GeneticCuts.h"
#endif
#ifndef ROOT_TMVA_SimulatedAnnealingCuts
#include "TMVA/SimulatedAnnealingCuts.h"
#endif
#ifndef ROOT_TMVA_Tools
#include "TMVA/Tools.h"
#endif
#ifndef ROOT_TMVA_Timer
#include "TMVA/Timer.h"
#endif
ClassImp(TMVA::MethodCuts)
;
TMVA::MethodCuts* TMVA::MethodCuts::fgThisCuts = NULL;
TMVA::MethodCuts::MethodCuts( TString jobName, TString methodTitle, DataSet& theData,
TString theOption, TDirectory* theTargetDir )
: TMVA::MethodBase( jobName, methodTitle, theData, theOption, theTargetDir )
{
InitCuts();
DeclareOptions();
ParseOptions();
ProcessOptions();
}
TMVA::MethodCuts::MethodCuts( DataSet& theData,
TString theWeightFile,
TDirectory* theTargetDir )
: TMVA::MethodBase( theData, theWeightFile, theTargetDir )
{
InitCuts();
DeclareOptions();
}
void TMVA::MethodCuts::InitCuts( void )
{
SetMethodName( "Cuts" );
SetMethodType( TMVA::Types::kCuts );
SetTestvarName();
fConstrainType = kConstrainEffS;
fVarHistS = fVarHistB = 0;
fVarHistS_smooth = fVarHistB_smooth = 0;
fVarPdfS = fVarPdfB = 0;
fFitParams = 0;
fEffBvsSLocal = 0;
fBinaryTreeS = fBinaryTreeB = 0;
fEffSMin = 0;
fEffSMax = 0;
fTrainEffBvsS = 0;
fTrainRejBvsS = 0;
fgThisCuts = this;
fNpar = 2*GetNvar();
fRangeSign = new vector<Int_t> ( GetNvar() );
fMeanS = new vector<Double_t>( GetNvar() );
fMeanB = new vector<Double_t>( GetNvar() );
fRmsS = new vector<Double_t>( GetNvar() );
fRmsB = new vector<Double_t>( GetNvar() );
fXmin = new vector<Double_t>( GetNvar() );
fXmax = new vector<Double_t>( GetNvar() );
fFitParams = new vector<EFitParameters>( GetNvar() );
for (Int_t ivar=0; ivar<GetNvar(); ivar++) (*fFitParams)[ivar] = kNotEnforced;
fRandom = new TRandom( 0 );
fFitMethod = kUseMonteCarlo;
fTestSignalEff = -1;
fCutMin = new Double_t*[GetNvar()];
fCutMax = new Double_t*[GetNvar()];
for (Int_t i=0;i<GetNvar();i++) {
fCutMin[i] = new Double_t[fNbins];
fCutMax[i] = new Double_t[fNbins];
}
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
for (Int_t ibin=0; ibin<fNbins; ibin++) {
fCutMin[ivar][ibin] = 0;
fCutMax[ivar][ibin] = 0;
}
}
fTmpCutMin = new Double_t[GetNvar()];
fTmpCutMax = new Double_t[GetNvar()];
}
TMVA::MethodCuts::~MethodCuts( void )
{
fLogger << kVERBOSE << "Destructor called" << Endl;
delete fRangeSign;
delete fRandom;
delete fMeanS;
delete fMeanB;
delete fRmsS;
delete fRmsB;
delete fXmin;
delete fXmax;
for (Int_t i=0;i<GetNvar();i++) {
if (fCutMin[i] != NULL) delete [] fCutMin[i];
if (fCutMax[i] != NULL) delete [] fCutMax[i];
}
delete[] fCutMin;
delete[] fCutMax;
delete[] fTmpCutMin;
delete[] fTmpCutMax;
if (NULL != fBinaryTreeS) delete fBinaryTreeS;
if (NULL != fBinaryTreeB) delete fBinaryTreeB;
}
void TMVA::MethodCuts::DeclareOptions()
{
DeclareOptionRef(fFitMethodS="MC", "Method", "Minimization Method");
AddPreDefVal(TString("GA"));
AddPreDefVal(TString("SA"));
AddPreDefVal(TString("MC"));
DeclareOptionRef(fEffMethodS = "EffSel", "EffMethod", "Selection Method");
AddPreDefVal(TString("EffSel"));
AddPreDefVal(TString("EffPDF"));
fNRandCuts = 100000;
DeclareOptionRef(fNRandCuts=100000, "MC_NRandCuts", "");
DeclareOptionRef(fAllVars="AllNotEnforced", "MC_AllVarProp", "");
AddPreDefVal(TString("AllNotEnforced"));
AddPreDefVal(TString("AllFMax"));
AddPreDefVal(TString("AllFMin"));
AddPreDefVal(TString("AllFSmart"));
AddPreDefVal(TString("AllFVerySmart"));
for(int i=0; i<10; i++) {
DeclareOptionRef(fAllVarsI[i]="NotEnforced", Form("MC_Var%iProp",i+1), "");
AddPreDefVal(TString("NotEnforced"));
AddPreDefVal(TString("FMax"));
AddPreDefVal(TString("FMin"));
AddPreDefVal(TString("FSmart"));
AddPreDefVal(TString("FVerySmart"));
}
fGA_cycles = 3;
fGA_SC_steps = 10;
fGA_popSize = 100;
fGA_SC_offsteps = 5;
fGA_SC_factor = 0.95;
fGA_nsteps = 30;
DeclareOptionRef(fGA_nsteps, "GA_nsteps", "");
DeclareOptionRef(fGA_cycles, "GA_cycles", "");
DeclareOptionRef(fGA_popSize, "GA_popSize", "");
DeclareOptionRef(fGA_SC_steps, "GA_SC_steps", "");
DeclareOptionRef(fGA_SC_offsteps, "GA_SC_offsteps", "");
DeclareOptionRef(fGA_SC_factor, "GA_SC_factor", "");
fSA_MaxCalls = 5000000;
fSA_TemperatureGradient = 0.7;
fSA_UseAdaptiveTemperature = kTRUE;
fSA_InitialTemperature = 100000;
fSA_MinTemperature = 500;
fSA_Eps = 1e-04;
fSA_NFunLoops = 5;
fSA_NEps = 4;
DeclareOptionRef(fSA_MaxCalls, "SA_MaxCalls", "");
DeclareOptionRef(fSA_TemperatureGradient, "SA_TemperatureGradient", "");
DeclareOptionRef(fSA_UseAdaptiveTemperature, "SA_UseAdaptiveTemperature", "");
DeclareOptionRef(fSA_InitialTemperature, "SA_InitialTemperature", "");
DeclareOptionRef(fSA_MinTemperature, "SA_MinTemperature", "");
DeclareOptionRef(fSA_Eps, "SA_Eps", "");
DeclareOptionRef(fSA_NFunLoops, "SA_NFunLoops", "");
DeclareOptionRef(fSA_NEps, "SA_NEps", "");
}
void TMVA::MethodCuts::ProcessOptions()
{
MethodBase::ProcessOptions();
if (fFitMethodS == "MC" ) fFitMethod = kUseMonteCarlo;
else if (fFitMethodS == "GA" ) fFitMethod = kUseGeneticAlgorithm;
else if (fFitMethodS == "SA" ) fFitMethod = kUseSimulatedAnnealing;
else {
fLogger << kFATAL << "unknown minimization method: " << fFitMethodS << Endl;
}
if (fEffMethodS == "EFFSEL" ) fEffMethod = kUseEventSelection;
else if (fEffMethodS == "EFFPDF" ) fEffMethod = kUsePDFs;
else fEffMethod = kUseEventSelection;
fLogger << kINFO << Form("use optimization method: '%s'\n",
(fFitMethod == kUseMonteCarlo) ? "Monte Carlo" : "Genetic Algorithm" );
fLogger << kINFO << Form("use efficiency computation method: '%s'\n",
(fEffMethod == kUseEventSelection) ? "Event Selection" : "PDF" );
if (fFitMethod == kUseMonteCarlo) {
if (fNRandCuts <= 1) {
fLogger << kFATAL << "invalid number of MC events: " << fNRandCuts << Endl;
}
fLogger << kINFO << "generate " << fNRandCuts << " random cut samples" << Endl;
if (fAllVars!="AllNotEnforced") {
EFitParameters theFitP = kNotEnforced;
if (fAllVars == "AllNotEnforced") theFitP = kNotEnforced;
else if (fAllVars == "AllFMax" ) theFitP = kForceMax;
else if (fAllVars == "AllFMin" ) theFitP = kForceMin;
else if (fAllVars == "AllFSmart" ) theFitP = kForceSmart;
else if (fAllVars == "AllFVerySmart" ) theFitP = kForceVerySmart;
else {
fLogger << kFATAL << "unknown value \'" << fAllVars
<< "\' for fit parameter option MC_AllVarProp" << Endl;
}
for (Int_t ivar=0; ivar<GetNvar(); ivar++) (*fFitParams)[ivar] = theFitP;
if (theFitP != kNotEnforced) fLogger << "use 'smart' cuts" << Endl;
}
else {
int maxVar = GetNvar()<=10?GetNvar():10;
for (Int_t ivar=0; ivar<maxVar; ivar++) {
EFitParameters theFitP = kNotEnforced;
if (fAllVarsI[ivar] == "" || fAllVarsI[ivar] == "NotEnforced") theFitP = kNotEnforced;
else if (fAllVarsI[ivar] == "FMax" ) theFitP = kForceMax;
else if (fAllVarsI[ivar] == "FMin" ) theFitP = kForceMin;
else if (fAllVarsI[ivar] == "FSmart" ) theFitP = kForceSmart;
else if (fAllVarsI[ivar] == "FVerySmart" ) theFitP = kForceVerySmart;
else {
fLogger << kFATAL << "unknown value \'" << fAllVarsI[ivar]
<< "\' for fit parameter option " << Form("MC_Var%iProp",ivar+1) << Endl;
}
(*fFitParams)[ivar] = theFitP;
if (theFitP != kNotEnforced)
fLogger << kINFO << "use 'smart' cuts for variable: "
<< "'" << (*fInputVars)[ivar] << "'" << Endl;
}
}
fLogger << kINFO << Form("number of MC events to be generated: %i\n", fNRandCuts );
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
TString theFitOption = ( ((*fFitParams)[ivar] == kNotEnforced) ? "NotEnforced" :
((*fFitParams)[ivar] == kForceMin ) ? "ForceMin" :
((*fFitParams)[ivar] == kForceMax ) ? "ForceMax" :
((*fFitParams)[ivar] == kForceSmart ) ? "ForceSmart" :
((*fFitParams)[ivar] == kForceVerySmart ) ? "ForceVerySmart" : "other" );
fLogger << kINFO << Form("option for variable: %s: '%s' (#: %i)\n",
(const char*)(*fInputVars)[ivar], (const char*)theFitOption,
(Int_t)(*fFitParams)[ivar] );
}
}
if (GetPreprocessingMethod() == Types::kDecorrelated)
fLogger << kINFO << "use decorrelated variable set" << Endl;
else if (GetPreprocessingMethod() == Types::kPCA)
fLogger << kINFO << "use principal component preprocessing" << Endl;
}
Double_t TMVA::MethodCuts::GetMvaValue()
{
if (fCutMin == NULL || fCutMax == NULL || fNbins == 0) {
fLogger << kFATAL << "<Eval_Cuts> fCutMin/Max have zero pointer. "
<< "Did you book Cuts ?" << Endl;
}
if (fTestSignalEff > 0) {
Int_t ibin = Int_t((fTestSignalEff - fEffSMin)/(fEffSMax - fEffSMin)*Double_t(fNbins));
if (ibin < 0 ) ibin = 0;
if (ibin >= fNbins) ibin = fNbins - 1;
Bool_t passed = kTRUE;
for (Int_t ivar=0; ivar<GetNvar(); ivar++)
passed &= ( (Data().Event().GetVal(ivar) >= fCutMin[ivar][ibin]) &&
(Data().Event().GetVal(ivar) <= fCutMax[ivar][ibin]) );
return passed ? 1. : 0. ;
}
else return 0;
}
void TMVA::MethodCuts::Train( void )
{
if (!SanityChecks()) fLogger << kFATAL << "Basic sanity checks failed" << Endl;
if (fEffMethod == kUsePDFs) CreateVariablePDFs();
fConstrainType = kConstrainEffS;
if (fBinaryTreeS != 0) delete fBinaryTreeS;
if (fBinaryTreeB != 0) delete fBinaryTreeB;
fBinaryTreeS = new TMVA::BinarySearchTree();
fBinaryTreeS->Fill( Data(), Data().GetTrainingTree(), 1, GetPreprocessingMethod(), GetPreprocessingType() );
fBinaryTreeB = new TMVA::BinarySearchTree();
fBinaryTreeB->Fill( Data(), Data().GetTrainingTree(), 0, GetPreprocessingMethod(), GetPreprocessingType() );
vector<TH1F*> signalDist, bkgDist;
for (UInt_t ivar = 0; ivar < Data().GetNVariables(); ivar++) {
const TString& varname = Data().GetInternalVarName(ivar);
Statistics( TMVA::Types::kTraining, varname,
(*fMeanS)[ivar], (*fMeanB)[ivar],
(*fRmsS)[ivar], (*fRmsB)[ivar],
(*fXmin)[ivar], (*fXmax)[ivar] );
}
Data().ResetCurrentTree();
fConstrainType = kConstrainEffS;
Int_t ibin=0;
fEffBvsSLocal = new TH1F( GetTestvarName() + "_effBvsSLocal",
TString(GetName()) + " efficiency of B vs S", fNbins, 0.0, 1.0 );
for (ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 );
if (fFitMethod == kUseMonteCarlo) {
Double_t* cutMin = new Double_t[GetNvar()];
Double_t* cutMax = new Double_t[GetNvar()];
fLogger << kINFO << "Generating " << fNRandCuts
<< " cycles (random cuts) in " << GetNvar() << " variables ... patience please" << Endl;
Int_t nBinsFilled=0, nBinsFilledAt=0;
TMVA::Timer timer( fNRandCuts, GetName() );
for (Int_t imc=0; imc<fNRandCuts; imc++) {
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
EFitParameters fitParam = (*fFitParams)[ivar];
if (fitParam == kForceSmart) {
if ((*fMeanS)[ivar] > (*fMeanB)[ivar]) fitParam = kForceMax;
else fitParam = kForceMin;
}
if (fitParam == kForceMin)
cutMin[ivar] = (*fXmin)[ivar];
else
cutMin[ivar] = fRandom->Rndm()*((*fXmax)[ivar] - (*fXmin)[ivar]) + (*fXmin)[ivar];
if (fitParam == kForceMax)
cutMax[ivar] = (*fXmax)[ivar];
else
cutMax[ivar] = fRandom->Rndm()*((*fXmax)[ivar] - cutMin[ivar] ) + cutMin[ivar];
if (fitParam == kForceVerySmart){
cutMin[ivar] = fRandom->Rndm()*((*fXmax)[ivar] - (*fXmin)[ivar]) + (*fXmin)[ivar];
cutMax[ivar] = fRandom->Rndm()*((*fXmax)[ivar] - cutMin[ivar] ) + cutMin[ivar];
}
if (cutMax[ivar] < cutMin[ivar]) {
fLogger << kFATAL << "<Train>: mismatch with cuts" << Endl;
}
}
Double_t effS = 0, effB = 0;
GetEffsfromSelection( &cutMin[0], &cutMax[0], effS, effB);
Int_t ibinS = (Int_t)(effS*Float_t(fNbins) + 1);
if (ibinS < 1 ) ibinS = 1;
if (ibinS > fNbins) ibinS = fNbins;
Double_t effBH = fEffBvsSLocal->GetBinContent( ibinS );
if (effBH < 0 || effBH > effB) {
fEffBvsSLocal->SetBinContent( ibinS, effB );
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
fCutMin[ivar][ibinS-1] = cutMin[ivar];
fCutMax[ivar][ibinS-1] = cutMax[ivar];
}
}
Int_t nout = 1000;
if ((Int_t)imc%nout == 0 || imc == fNRandCuts-1) {
Int_t nbinsF = 0, ibin_;
for (ibin_=0; ibin_<fNbins; ibin_++)
if (fEffBvsSLocal->GetBinContent( ibin_ +1 ) >= 0) nbinsF++;
if (nBinsFilled!=nbinsF) {
nBinsFilled = nbinsF;
nBinsFilledAt = imc;
}
timer.DrawProgressBar( imc );
if (imc == fNRandCuts-1 )
fLogger << kINFO << Form( "fraction of efficiency bins filled: %3.1f ",
nbinsF/Float_t(fNbins) ) << Endl;
}
}
fLogger << kVERBOSE << "fraction of filled eff. bins did not increase"
<< " anymore after "<< nBinsFilledAt << " cycles" << Endl;
fLogger << kINFO << "elapsed time: " << timer.GetElapsedTime()
<< " " << Endl;
delete[] cutMin;
delete[] cutMax;
}
else if (fFitMethod == kUseGeneticAlgorithm) {
vector<LowHigh_t*> ranges;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
(*fRangeSign)[ivar] = +1;
ranges.push_back( new LowHigh_t( (*fXmin)[ivar], (*fXmax)[ivar] ) );
ranges.push_back( new LowHigh_t( 0, (*fXmax)[ivar] - (*fXmin)[ivar] ) );
}
fLogger << kINFO << "GA: calculation, please be patient ..." << Endl;
TMVA::Timer timer1( fGA_cycles, GetName() );
for (Int_t cycle = 0; cycle < fGA_cycles; cycle++) {
timer1.DrawProgressBar( cycle );
TMVA::GeneticCuts ga( fGA_popSize, ranges, this );
ga.CalculateFitness();
ga.GetGeneticPopulation().TrimPopulation();
do {
ga.Init();
ga.CalculateFitness();
ga.SpreadControl( fGA_SC_steps, fGA_SC_offsteps, fGA_SC_factor );
} while (!ga.HasConverged( fGA_nsteps, 0.0001 ));
}
fLogger << kINFO << "GA: elapsed time: " << timer1.GetElapsedTime()
<< " " << Endl;
}
else if (fFitMethod == kUseSimulatedAnnealing) {
vector<LowHigh_t*> ranges;
vector<Double_t> par;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
(*fRangeSign)[ivar] = +1;
ranges.push_back( new LowHigh_t( (*fXmin)[ivar], (*fXmax)[ivar] ) );
ranges.push_back( new LowHigh_t( 0, (*fXmax)[ivar] - (*fXmin)[ivar] ) );
par.push_back( (ranges[2*ivar]->first + ranges[2*ivar]->second)/2.0 );
par.push_back( (ranges[2*ivar+1]->first + ranges[2*ivar+1]->second)/2.0 );
}
TMVA::SimulatedAnnealingCuts saCuts( ranges );
saCuts.SetMaxCalls ( fSA_MaxCalls );
saCuts.SetTempGrad ( fSA_TemperatureGradient );
saCuts.SetUseAdaptTemp( fSA_UseAdaptiveTemperature );
saCuts.SetInitTemp ( fSA_InitialTemperature );
saCuts.SetMinTemp ( fSA_MinTemperature );
saCuts.SetNumFunLoops ( fSA_NFunLoops );
saCuts.SetAccuracy ( fSA_Eps );
saCuts.SetNEps ( fSA_NEps );
fLogger << kINFO << "SA: entree, please be patient ..." << Endl;
TMVA::Timer timer( fNbins, GetName() );
Double_t* cutMin = new Double_t[GetNvar()];
Double_t* cutMax = new Double_t[GetNvar()];
for (ibin=1; ibin<=fNbins; ibin++) {
timer.DrawProgressBar( ibin );
fEffRef = fEffBvsSLocal->GetBinCenter( ibin );
Double_t effS = 0, effB = 0;
this->MatchParsToCuts ( par, &cutMin[0], &cutMax[0] );
this->GetEffsfromSelection( &cutMin[0], &cutMax[0], effS, effB);
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
fCutMin[ivar][ibin-1] = cutMin[ivar];
fCutMax[ivar][ibin-1] = cutMax[ivar];
}
}
delete [] cutMin;
delete [] cutMax;
fLogger << kINFO << "SA: elapsed time: " << timer.GetElapsedTime()
<< " " << Endl;
}
else fLogger << kFATAL << "unknown minization method: " << fFitMethod << Endl;
if (fBinaryTreeS != 0) { delete fBinaryTreeS; fBinaryTreeS = 0; }
if (fBinaryTreeB != 0) { delete fBinaryTreeB; fBinaryTreeB = 0; }
}
void TMVA::MethodCuts::Test( TTree* )
{
}
Double_t TMVA::MethodCuts::ComputeEstimator( const std::vector<Double_t>& par )
{
Double_t effS = 0, effB = 0;
this->MatchParsToCuts( par, &fTmpCutMin[0], &fTmpCutMax[0] );
switch (fEffMethod) {
case kUsePDFs:
this->GetEffsfromPDFs( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB );
break;
case kUseEventSelection:
this->GetEffsfromSelection( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
break;
default:
this->GetEffsfromSelection( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
}
Double_t eta = 0;
Int_t ibinS = (Int_t)(effS*Float_t(fNbins) + 1);
if (ibinS < 1 ) ibinS = 1;
if (ibinS > fNbins) ibinS = fNbins;
Double_t effBH = fEffBvsSLocal->GetBinContent( ibinS );
eta = ( 1. - (effBH - effB) ) / (1+effS);
if (effBH < 0 || effBH > effB) {
fEffBvsSLocal->SetBinContent( ibinS, effB );
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
fCutMin[ivar][ibinS-1] = fTmpCutMin[ivar];
fCutMax[ivar][ibinS-1] = fTmpCutMax[ivar];
}
}
return eta;
}
void TMVA::MethodCuts::MatchParsToCuts( const std::vector<Double_t> & par,
Double_t* cutMin, Double_t* cutMax )
{
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
Int_t ipar = 2*ivar;
cutMin[ivar] = ((*fRangeSign)[ivar] > 0) ? par[ipar] : par[ipar] - par[ipar+1];
cutMax[ivar] = ((*fRangeSign)[ivar] > 0) ? par[ipar] + par[ipar+1] : par[ipar];
}
}
void TMVA::MethodCuts::MatchCutsToPars( Double_t* par,
Double_t* cutMin, Double_t* cutMax )
{
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
Int_t ipar = 2*ivar;
par[ipar] = ((*fRangeSign)[ivar] > 0) ? cutMin[ivar] : cutMax[ivar];
par[ipar+1] = cutMax[ivar] - cutMin[ivar];
}
}
void TMVA::MethodCuts::GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
Double_t& effS, Double_t& effB )
{
effS = 1.0;
effB = 1.0;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
effS *= (*fVarPdfS)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
effB *= (*fVarPdfB)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
}
}
void TMVA::MethodCuts::GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
Double_t& effS, Double_t& effB)
{
Float_t nTotS = 0, nTotB = 0;
Float_t nSelS = 0, nSelB = 0;
TMVA::Volume* volume = new TMVA::Volume( cutMin, cutMax, GetNvar() );
nSelS = fBinaryTreeS->SearchVolume( volume );
nSelB = fBinaryTreeB->SearchVolume( volume );
nTotS = Float_t(fBinaryTreeS->GetSumOfWeights());
nTotB = Float_t(fBinaryTreeB->GetSumOfWeights());
delete volume;
if (nTotS == 0 && nTotB == 0) {
fLogger << kFATAL << "<GetEffsfromSelection> fatal error in zero total number of events:"
<< " nTotS, nTotB: " << nTotS << " " << nTotB << " ***" << Endl;
}
if (nTotS == 0 ) {
effS = 0;
effB = nSelB/nTotB;
fLogger << kWARNING << "<ComputeEstimator> zero number of signal events" << Endl;
}
else if ( nTotB == 0) {
effB = 0;
effS = nSelS/nTotS;
fLogger << kWARNING << "<ComputeEstimator> zero number of background events" << Endl;
}
else {
effS = nSelS/nTotS;
effB = nSelB/nTotB;
}
}
void TMVA::MethodCuts::CreateVariablePDFs( void )
{
fVarHistS = new vector<TH1*> ( GetNvar() );
fVarHistB = new vector<TH1*> ( GetNvar() );
fVarHistS_smooth = new vector<TH1*> ( GetNvar() );
fVarHistB_smooth = new vector<TH1*> ( GetNvar() );
fVarPdfS = new vector<TMVA::PDF*>( GetNvar() );
fVarPdfB = new vector<TMVA::PDF*>( GetNvar() );
Int_t nsmooth = 0;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
TString histTitle = (*fInputVars)[ivar] + " signal training";
TString histName = (*fInputVars)[ivar] + "_sig";
TString drawOpt = (*fInputVars)[ivar] + ">>h(";
drawOpt += fNbins;
drawOpt += ")";
Data().GetTrainingTree()->Draw( drawOpt, "type==1", "goff" );
(*fVarHistS)[ivar] = (TH1F*)gDirectory->Get("h");
(*fVarHistS)[ivar]->SetName(histName);
(*fVarHistS)[ivar]->SetTitle(histTitle);
(*fVarHistS_smooth)[ivar] = (TH1F*)(*fVarHistS)[ivar]->Clone();
histTitle = (*fInputVars)[ivar] + " signal training smoothed ";
histTitle += nsmooth;
histTitle +=" times";
histName = (*fInputVars)[ivar] + "_sig_smooth";
(*fVarHistS_smooth)[ivar]->SetName(histName);
(*fVarHistS_smooth)[ivar]->SetTitle(histTitle);
(*fVarHistS_smooth)[ivar]->Smooth(nsmooth);
histTitle = (*fInputVars)[ivar] + " background training";
histName = (*fInputVars)[ivar] + "_bgd";
drawOpt = (*fInputVars)[ivar] + ">>h(";
drawOpt += fNbins;
drawOpt += ")";
Data().GetTrainingTree()->Draw( drawOpt, "type==0", "goff" );
(*fVarHistB)[ivar] = (TH1F*)gDirectory->Get("h");
(*fVarHistB)[ivar]->SetName(histName);
(*fVarHistB)[ivar]->SetTitle(histTitle);
(*fVarHistB_smooth)[ivar] = (TH1F*)(*fVarHistB)[ivar]->Clone();
histTitle = (*fInputVars)[ivar]+" background training smoothed ";
histTitle += nsmooth;
histTitle +=" times";
histName = (*fInputVars)[ivar]+"_bgd_smooth";
(*fVarHistB_smooth)[ivar]->SetName(histName);
(*fVarHistB_smooth)[ivar]->SetTitle(histTitle);
(*fVarHistB_smooth)[ivar]->Smooth(nsmooth);
(*fVarPdfS)[ivar] = new TMVA::PDF( (*fVarHistS_smooth)[ivar], TMVA::PDF::kSpline2 );
(*fVarPdfB)[ivar] = new TMVA::PDF( (*fVarHistB_smooth)[ivar], TMVA::PDF::kSpline2 );
}
}
Bool_t TMVA::MethodCuts::SanityChecks( void )
{
Bool_t isOK = kTRUE;
TObjArrayIter branchIter( Data().GetTrainingTree()->GetListOfBranches(), kIterForward );
TBranch* branch = 0;
Int_t ivar = -1;
while ((branch = (TBranch*)branchIter.Next()) != 0) {
TString branchName = branch->GetName();
if (branchName != "type" && branchName != "weight" && branchName != "boostweight") {
ivar++;
if ((*fInputVars)[ivar] != branchName) {
fLogger << kWARNING << "<SanityChecks> mismatch in variables" << Endl;
isOK = kFALSE;
}
}
}
return isOK;
}
void TMVA::MethodCuts::WriteWeightsToStream( ostream & o ) const
{
o << "OptimisationMethod " << "nRandCuts " << "nbins:" << endl;
o << ((fEffMethod == kUseEventSelection) ? "Fit-EventSelection" :
(fEffMethod == kUsePDFs) ? "Fit-PDF" : "Monte-Carlo") << " " ;
o << fNRandCuts << " ";
o << fNbins << endl;
o << "Below are the optimised cuts for " << GetNvar() << " variables:" << endl;
o << "Format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0]"
<< " ... cutMin[ivar=n-1] cutMax[ivar=n-1]" << endl;
for (Int_t ibin=0; ibin<fNbins; ibin++) {
o << setw(4) << ibin+1 << " "
<< setw(8)<< fEffBvsSLocal->GetBinCenter( ibin +1 ) << " "
<< setw(8)<< fEffBvsSLocal->GetBinContent( ibin +1 ) << " ";
for (Int_t ivar=0; ivar<GetNvar(); ivar++)
o <<setw(10)<< fCutMin[ivar][ibin] << " " << setw(10) << fCutMax[ivar][ibin] << " ";
o << endl;
}
}
void TMVA::MethodCuts::ReadWeightsFromStream( istream& istr )
{
TString dummy;
UInt_t dummyInt;
istr >> dummy >> dummy >> dummy;
istr >> dummy >> fNRandCuts >> fNbins;
istr >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >> dummyInt >> dummy ;
if (dummyInt != Data().GetNVariables()) {
fLogger << kFATAL << "<ReadWeightsFromStream> fatal error: mismatch "
<< "in number of variables: " << dummyInt << " != " << Data().GetNVariables() << Endl;
}
SetNvar(dummyInt);
fLogger << kINFO << "Read cuts from "<< fNRandCuts << " MC events"
<< " in " << fNbins << " efficiency bins and for " << GetNvar() << " variables" << Endl;
char buffer[200];
istr.getline(buffer,200);
istr.getline(buffer,200);
Int_t tmpbin;
Float_t tmpeffS, tmpeffB;
for (Int_t ibin=0; ibin<fNbins; ibin++) {
istr >> tmpbin >> tmpeffS >> tmpeffB;
if (ibin == 0 ) fEffSMin = tmpeffS;
if (ibin == fNbins-1) fEffSMax = tmpeffS;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
istr >> fCutMin[ivar][ibin] >> fCutMax[ivar][ibin];
}
}
}
void TMVA::MethodCuts::WriteMonitoringHistosToFile( void ) const
{
fLogger << kINFO << "write monitoring histograms to file: " << BaseDir()->GetPath() << Endl;
BaseDir()->cd();
fEffBvsSLocal->Write();
if (fEffMethod == kUsePDFs) {
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
(*fVarHistS)[ivar]->Write();
(*fVarHistB)[ivar]->Write();
(*fVarHistS_smooth)[ivar]->Write();
(*fVarHistB_smooth)[ivar]->Write();
(*fVarPdfS)[ivar]->GetPDFHist()->Write();
(*fVarPdfB)[ivar]->GetPDFHist()->Write();
}
}
}
Double_t TMVA::MethodCuts::GetTrainingEfficiency( TString theString)
{
TList* list = TMVA::Tools::ParseFormatLine( theString );
if (list->GetSize() != 2) {
fLogger << kWARNING << "<GetTrainingEfficiency> wrong number of arguments"
<< " in string: " << theString
<< " | required format, e.g., Efficiency:0.05" << Endl;
return -1;
}
Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
fLogger << kVERBOSE << "<GetTrainingEfficiency> compute eff(S) at eff(B) = "
<< effBref << Endl;
Bool_t firstPass = (NULL == fTrainEffBvsS || NULL == fTrainRejBvsS);
if (firstPass) {
if (fBinaryTreeS != 0) delete fBinaryTreeS;
if (fBinaryTreeB != 0) delete fBinaryTreeB;
fBinaryTreeS = new TMVA::BinarySearchTree();
fBinaryTreeS->Fill( Data(), Data().GetTrainingTree(), 1 );
fBinaryTreeB = new TMVA::BinarySearchTree();
fBinaryTreeB->Fill( Data(), Data().GetTrainingTree(), 0 );
if (NULL != fTrainEffBvsS) delete fTrainEffBvsS;
if (NULL != fTrainRejBvsS) delete fTrainRejBvsS;
fTrainEffBvsS = new TH1F( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
fTrainRejBvsS = new TH1F( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
Double_t* tmpCutMin = new Double_t[GetNvar()];
Double_t* tmpCutMax = new Double_t[GetNvar()];
for (Int_t bini=1; bini<=fNbins; bini++) {
for (Int_t ivar=0; ivar <GetNvar(); ivar++){
tmpCutMin[ivar] = fCutMin[ivar][bini-1];
tmpCutMax[ivar] = fCutMax[ivar][bini-1];
}
Double_t effS, effB;
this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
fTrainEffBvsS->SetBinContent( bini, effB );
fTrainRejBvsS->SetBinContent( bini, 1.0-effB );
}
delete[] tmpCutMin;
delete[] tmpCutMax;
fGraphTrainEffBvsS = new TGraph( fTrainEffBvsS );
fSplTrainEffBvsS = new TMVA::TSpline1( "trainEffBvsS", fGraphTrainEffBvsS );
}
if (NULL == fSplTrainEffBvsS) return 0.0;
Double_t effS, effB, effS_ = 0, effB_ = 0;
Int_t nbins_ = 1000;
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSplTrainEffBvsS->Eval( effS );
if ((effB - effBref)*(effB_ - effBref) < 0) break;
effS_ = effS;
effB_ = effB;
}
return 0.5*(effS + effS_);
}
Double_t TMVA::MethodCuts::GetEfficiency( TString theString, TTree* )
{
TList* list = TMVA::Tools::ParseFormatLine( theString );
if (list->GetSize() != 2) {
fLogger << kWARNING << "<GetEfficiency> wrong number of arguments"
<< " in string: " << theString
<< " | required format, e.g., Efficiency:0.05" << Endl;
return -1;
}
Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
fLogger << kVERBOSE << "<GetEfficiency> compute eff(S) at eff(B) = " << effBref << Endl;
if ( fEffBvsS == NULL || fRejBvsS == NULL) {
if (fBinaryTreeS!=0) delete fBinaryTreeS;
if (fBinaryTreeB!=0) delete fBinaryTreeB;
fBinaryTreeS = new TMVA::BinarySearchTree();
fBinaryTreeS->Fill( Data(), Data().GetTestTree(), 1, GetPreprocessingMethod(), GetPreprocessingType() );
fBinaryTreeB = new TMVA::BinarySearchTree();
fBinaryTreeB->Fill( Data(), Data().GetTestTree(), 0, GetPreprocessingMethod(), GetPreprocessingType() );
if (NULL != fEffBvsS)delete fEffBvsS;
if (NULL != fRejBvsS)delete fRejBvsS;
fEffBvsS = new TH1F( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
fRejBvsS = new TH1F( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
Double_t* tmpCutMin = new Double_t[GetNvar()];
Double_t* tmpCutMax = new Double_t[GetNvar()];
for (Int_t bini=1; bini<=fNbins; bini++) {
for (Int_t ivar=0; ivar <GetNvar(); ivar++){
tmpCutMin[ivar] = fCutMin[ivar][bini-1];
tmpCutMax[ivar] = fCutMax[ivar][bini-1];
}
Double_t effS, effB;
this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
fEffBvsS->SetBinContent( bini, effB );
fRejBvsS->SetBinContent( bini, 1.0-effB );
}
delete[] tmpCutMin;
delete[] tmpCutMax;
fGrapheffBvsS = new TGraph( fEffBvsS );
fSpleffBvsS = new TMVA::TSpline1( "effBvsS", fGrapheffBvsS );
}
if (NULL == fSpleffBvsS) return 0.0;
Double_t effS, effB, effS_ = 0, effB_ = 0;
Int_t nbins_ = 1000;
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSpleffBvsS->Eval( effS );
if ((effB - effBref)*(effB_ - effBref) < 0) break;
effS_ = effS;
effB_ = effB;
}
return 0.5*(effS + effS_);
}
ROOT page - Class index - Class Hierarchy - Top of the page
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.