* *
**********************************************************************************/
//Begin_Html
/*
Multivariate optimisation of signal efficiency for given background
efficiency, applying rectangular minimum and maximum requirements.
<p>
<font size="-1">
Other optimisation criteria, such as maximising the signal significance-
squared, S^2/(S+B), with S and B being the signal and background yields,
correspond to a particular point in the optimised background rejection
versus signal efficiency curve. This working point requires the knowledge
of the expected yields, which is not the case in general. Note also that
for rare signals, Poissonian statistics should be used, which modifies
the significance criterion.
</font>
<p>
The rectangular cut of a volume in the variable space is performed using
a binary tree to sort the training events. This provides a significant
reduction in computing time (up to several orders of magnitudes, depending
on the complexity of the problem at hand).
<p>
Technically, optimisation is achieved in TMVA by two methods:
<ol>
<li>Monte Carlo generation using uniform priors for the lower cut value,
and the cut width, thrown within the variable ranges.
<li>A Genetic Algorithm (GA) searches for the optimal ("fittest") cut sample.
The GA is configurable by many external settings through the option
string. For difficult cases (such as many variables), some tuning
may be necessary to achieve satisfying results
</ol>
<p>
<font size="-1">
Attempts to use Minuit fits (Simplex ot Migrad) instead have not shown
superior results, and often failed due to convergence at local minima.
</font>
<p>
The tests we have performed so far showed that in generic applications,
the GA is superior to MC sampling, and hence GA is the default method.
It is worthwhile to try both anyway.
*/
//End_Html
#include <stdio.h>
#include "time.h"
#include "Riostream.h"
#include "TH1F.h"
#include "TObjString.h"
#ifndef ROOT_TMVA_MethodCuts
#include "TMVA/MethodCuts.h"
#endif
#ifndef ROOT_TMVA_GeneticCuts
#include "TMVA/GeneticCuts.h"
#endif
#ifndef ROOT_TMVA_Tools
#include "TMVA/Tools.h"
#endif
#ifndef ROOT_TMVA_Timer
#include "TMVA/Timer.h"
#endif
ClassImp(TMVA::MethodCuts)
TMVA::MethodCuts* TMVA::MethodCuts::fgThisCuts = NULL;
TMVA::MethodCuts::MethodCuts( TString jobName, vector<TString>* theVariables,
TTree* theTree, TString theOption, TDirectory* theTargetDir )
: TMVA::MethodBase( jobName, theVariables, theTree, theOption, theTargetDir )
{
InitCuts();
TList* list = TMVA::Tools::ParseFormatLine( fOptions );
if (list->GetSize()<1) {
fOptions = "MC:10000:";
cout << "--- " << GetName() << ": problems with options string, using default: "
<< fOptions << endl;
list = TMVA::Tools::ParseFormatLine( fOptions );
}
TString s = ((TObjString*)list->At(0))->GetString();
s.ToUpper();
if (s.Contains( "MC" )) fFitMethod = kUseMonteCarlo;
else if (s.Contains( "GA" )) fFitMethod = kUseGeneticAlgorithm;
else {
cout << "--- " << GetName() << ": unknown entry in field 0 of option string: "
<< s << " ==> abort" << endl;
exit(1);
}
if (list->GetSize() > 1) {
s = ((TObjString*)list->At(1))->GetString();
s.ToUpper();
if (s.Contains( "EFFSEL" )) fEffMethod = kUseEventSelection;
else if (s.Contains( "EFFPDF" )) fEffMethod = kUsePDFs;
else fEffMethod = kUseEventSelection;
}
cout << "--- " << GetName() << ": interpret options string: '" << fOptions << "'" << endl;
printf( "--- %s: --> use optimization method: '%s'\n",
GetName(), (fFitMethod == kUseMonteCarlo) ? "Monte Carlo" : "Genetic Algorithm" );
printf( "--- %s: --> use efficiency computation method: '%s'\n",
GetName(), (fEffMethod == kUseEventSelection) ? "Event Selection" : "PDF" );
switch (fFitMethod) {
case kUseMonteCarlo:
if (list->GetSize() > 2) {
s = ((TObjString*)list->At(2))->GetString();
fNRandCuts = atoi( s );
if (fNRandCuts <= 1) {
cout << "--- " << GetName() << ": invalid number of MC events: " << fNRandCuts
<< " in field 2 of option string: " << s << " ==> abort" << endl;
exit(1);
}
}
cout << "--- " << GetName() << ": generate " << fNRandCuts << " random cut samples"
<< endl;
if (list->GetSize() > 3) {
s = ((TObjString*)list->At(3))->GetString();
s.ToUpper();
if (s.Contains( "ALL" )) {
FitParameters theFitP = kNotEnforced;
if (s.Contains( "FMAX" )) theFitP = kForceMax;
else if (s.Contains( "FMIN" )) theFitP = kForceMin;
else if (s.Contains( "FSMART" )) theFitP = kForceSmart;
else if (s.Contains( "FVERYSMART" )) theFitP = kForceVerySmart;
else {
cout << "--- " << GetName() << ": unknown fit parameter option "
<< " in field 2 of option string: " << s << " ==> abort" << endl;
exit(1);
}
for (Int_t ivar=0; ivar<fNvar; ivar++) (*fFitParams)[ivar] = theFitP;
if (theFitP != kNotEnforced)
cout << "--- " << GetName() << ": use 'smart' cuts" << endl;
}
else {
for (Int_t ivar=0; ivar<fNvar; ivar++) {
if (list->GetSize() >= 3+ivar) {
s = ((TObjString*)list->At(2+ivar))->GetString();
s.ToUpper();
FitParameters theFitP = kNotEnforced;
if (s == "" || s == "NOTENFORCED") theFitP = kNotEnforced;
else if (s.Contains( "FMAX" )) theFitP = kForceMax;
else if (s.Contains( "FMIN" )) theFitP = kForceMin;
else if (s.Contains( "FSMART" )) theFitP = kForceSmart;
else if (s.Contains( "FVERYSMART" )) theFitP = kForceVerySmart;
else {
cout << "--- " << GetName() << ": unknown fit parameter option "
<< " in field " << ivar+3 << " (var: " << ivar
<< " of option string: " << s << " ==> abort" << endl;
exit(1);
}
(*fFitParams)[ivar] = theFitP;
if (theFitP != kNotEnforced)
cout << "--- " << GetName() << ": use 'smart' cuts for variable: "
<< "'" << (*fInputVars)[ivar] << "'" << endl;
}
}
}
}
break;
case kUseGeneticAlgorithm:
if (list->GetSize() > 2) {
s = ((TObjString*)list->At(2))->GetString(); fGa_nsteps = atoi( s );
if (list->GetSize() > 3) {
s = ((TObjString*)list->At(3))->GetString(); fGa_preCalc = atoi( s );
if (list->GetSize() > 4) {
s = ((TObjString*)list->At(4))->GetString(); fGa_SC_steps = atoi( s );
if (list->GetSize() > 5) {
s = ((TObjString*)list->At(5))->GetString(); fGa_SC_offsteps = atoi( s );
if (list->GetSize() > 6) {
s = ((TObjString*)list->At(6))->GetString(); fGa_SC_factor = atof( s );
}
}
}
}
}
break;
default:
cout << "--- " << GetName() << ": Error: unknown method: " << fFitMethod
<< " ==> abort" << endl;
exit(1);
}
if (fFitMethod == kUseMonteCarlo)
printf( "--- %s: --> number of MC events to be generated: %i\n", GetName(), fNRandCuts );
for (Int_t ivar=0; ivar<fNvar; ivar++) {
TString theFitOption = ( ((*fFitParams)[ivar] == kNotEnforced) ? "NotEnforced" :
((*fFitParams)[ivar] == kForceMin ) ? "ForceMin" :
((*fFitParams)[ivar] == kForceMax ) ? "ForceMax" :
((*fFitParams)[ivar] == kForceSmart ) ? "ForceSmart" :
((*fFitParams)[ivar] == kForceVerySmart ) ? "ForceVerySmart" : "other" );
printf( "--- %s: --> option for variable: %s: '%s' (#: %i)\n",
GetName(), (const char*)(*fInputVars)[ivar], (const char*)theFitOption,
(Int_t)(*fFitParams)[ivar] );
}
}
TMVA::MethodCuts::MethodCuts( vector<TString> *theVariables,
TString theWeightFile,
TDirectory* theTargetDir )
: TMVA::MethodBase( theVariables, theWeightFile, theTargetDir )
{
InitCuts();
}
void TMVA::MethodCuts::InitCuts( void )
{
fMethodName = "Cuts";
fMethod = TMVA::Types::Cuts;
fTestvar = fTestvarPrefix+GetMethodName();
fConstrainType = kConstrainEffS;
fVarHistS = fVarHistB = 0;
fVarHistS_smooth = fVarHistB_smooth = 0;
fVarPdfS = fVarPdfB = 0;
fFitParams = 0;
fEffBvsSLocal = 0;
fBinaryTreeS = fBinaryTreeB = 0;
fEffSMin = 0;
fEffSMax = 0;
fNRandCuts = 100000;
fGa_preCalc = 3;
fGa_SC_steps = 10;
fGa_SC_offsteps = 5;
fGa_SC_factor = 0.95;
fGa_nsteps = 30;
fgThisCuts = this;
fNpar = 2*fNvar;
fRangeSign = new vector<Int_t> ( fNvar );
fMeanS = new vector<Double_t>( fNvar );
fMeanB = new vector<Double_t>( fNvar );
fRmsS = new vector<Double_t>( fNvar );
fRmsB = new vector<Double_t>( fNvar );
fXmin = new vector<Double_t>( fNvar );
fXmax = new vector<Double_t>( fNvar );
fFitParams = new vector<FitParameters>( fNvar );
for (Int_t ivar=0; ivar<fNvar; ivar++) (*fFitParams)[ivar] = kNotEnforced;
fTrandom = new TRandom( 0 );
fFitMethod = kUseMonteCarlo;
fTestSignalEff = -1;
fCutMin = new Double_t*[fNvar];
fCutMax = new Double_t*[fNvar];
for (Int_t i=0;i<fNvar;i++) {
fCutMin[i] = new Double_t[fNbins];
fCutMax[i] = new Double_t[fNbins];
}
for (Int_t ivar=0; ivar<fNvar; ivar++) {
for (Int_t ibin=0; ibin<fNbins; ibin++) {
fCutMin[ivar][ibin] = 0;
fCutMax[ivar][ibin] = 0;
}
}
fTmpCutMin = new Double_t[fNvar];
fTmpCutMax = new Double_t[fNvar];
}
TMVA::MethodCuts::~MethodCuts( void )
{
if (Verbose()){
cout << "--- TMVA::MethodCuts: Destructor called " << endl;
}
delete fRangeSign;
delete fTrandom;
delete fMeanS;
delete fMeanB;
delete fRmsS;
delete fRmsB;
delete fXmin;
delete fXmax;
for (Int_t i=0;i<fNvar;i++) {
if (fCutMin[i] != NULL) delete [] fCutMin[i];
if (fCutMax[i] != NULL) delete [] fCutMax[i];
}
delete[] fCutMin;
delete[] fCutMax;
delete[] fTmpCutMin;
delete[] fTmpCutMax;
if (NULL != fBinaryTreeS) delete fBinaryTreeS;
if (NULL != fBinaryTreeB) delete fBinaryTreeB;
}
Double_t TMVA::MethodCuts::GetMvaValue( TMVA::Event *e )
{
if (fCutMin == NULL || fCutMax == NULL || fNbins == 0) {
cerr << "--- " << GetName() << "::Eval_Cuts: Fatal Error: fCutMin/Max have zero pointer. "
<< "Did you book Cuts ? ==> abort" << endl;
exit(1);
}
if (fTestSignalEff > 0) {
Int_t ibin = int((fTestSignalEff - fEffSMin)/(fEffSMax - fEffSMin)*Double_t(fNbins));
if (ibin < 0 ) ibin = 0;
if (ibin >= fNbins) ibin = fNbins - 1;
Bool_t passed = kTRUE;
for (size_t ivar=0; ivar<e->GetData().size(); ivar++) {
passed &= (e->GetData()[ivar] >= fCutMin[ivar][ibin]) && (e->GetData()[ivar] <= fCutMax[ivar][ibin]);
}
return passed ? 1. : 0. ;
}
else return 0;
}
void TMVA::MethodCuts::Train( void )
{
if (!SanityChecks()) {
cout << "--- " << GetName() << ": Error: Basic sanity checks failed ==> abort"
<< endl;
exit(1);
}
if (fEffMethod == kUsePDFs) CreateVariablePDFs();
fConstrainType = kConstrainEffS;
fBinaryTreeS = new TMVA::BinarySearchTree();
fBinaryTreeS->Fill( fTrainingTree, fInputVars, 1 );
fBinaryTreeB = new TMVA::BinarySearchTree();
fBinaryTreeB->Fill( fTrainingTree, fInputVars, 0 );
TObjArrayIter branchIter( fTrainingTree->GetListOfBranches(), kIterForward );
TBranch* branch = 0;
Int_t ivar = -1;
const int nBranches = ( fTrainingTree->GetListOfBranches() != 0 ?
fTrainingTree->GetListOfBranches()->GetSize() : 0 );
TString* branchName = new TString[nBranches];
Float_t* branchVar = new Float_t[nBranches];
Int_t theType;
vector<TH1F*> signalDist, bkgDist;
while ((branch = (TBranch*)branchIter.Next()) != 0) {
if ((TString)branch->GetName() == "type") {
fTrainingTree->SetBranchAddress( branch->GetName(), &theType );
}
else {
++ivar;
branchName[ivar] = branch->GetName();
fTrainingTree->SetBranchAddress( branchName[ivar], &branchVar[ivar] );
TMVA::Tools::ComputeStat( fTrainingTree, branchName[ivar],
(*fMeanS)[ivar], (*fMeanB)[ivar],
(*fRmsS)[ivar], (*fRmsB)[ivar],
(*fXmin)[ivar], (*fXmax)[ivar] );
TString name = Form( "sigDistVar%d",ivar );
signalDist.push_back( (TH1F*)TMVA::Tools::projNormTH1F( fTrainingTree, branchName[ivar], name, 50,
(*fXmin)[ivar], (*fXmax)[ivar],
"type==1" ) );
name = Form( "bkgDistVar%d",ivar );
bkgDist.push_back( (TH1F*)TMVA::Tools::projNormTH1F( fTrainingTree, branchName[ivar], name,50,
(*fXmin)[ivar],(*fXmax)[ivar],
"type==0" ) );
if ((*fInputVars)[ivar] != branchName[ivar]) {
cout << "Error in: " << GetName() << "::Train: mismatch in variables ==> abort: "
<< ivar << " " << (*fInputVars)[ivar] << " " << branchName[ivar]
<< endl;
exit(1);
}
}
}
delete[] branchName;
delete[] branchVar;
fConstrainType = kConstrainEffS;
Int_t ibin=0;
fEffBvsSLocal = new TH1F( fTestvar + "_effBvsSLocal",
TString(GetName()) + " efficiency of B vs S",
fNbins, 0.0, 1.0 );
for (ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 );
if (fFitMethod == kUseMonteCarlo) {
Double_t * cutMin = new Double_t[fNvar];
Double_t * cutMax = new Double_t[fNvar];
cout << "--- " << GetName() << ": Generating " << fNRandCuts
<< " cycles (random cuts) ... patience please" << endl;
Int_t nBinsFilled=0, nBinsFilledAt=0;
TMVA::Timer timer( fNRandCuts, GetName() );
for (Int_t imc=0; imc<fNRandCuts; imc++) {
for (Int_t ivar=0; ivar<fNvar; ivar++) {
FitParameters fitParam = (*fFitParams)[ivar];
if (fitParam == kForceSmart) {
if ((*fMeanS)[ivar] > (*fMeanB)[ivar]) fitParam = kForceMax;
else fitParam = kForceMin;
}
if (fitParam == kForceMin)
cutMin[ivar] = (*fXmin)[ivar];
else
cutMin[ivar] = fTrandom->Rndm()*((*fXmax)[ivar] - (*fXmin)[ivar]) + (*fXmin)[ivar];
if (fitParam == kForceMax)
cutMax[ivar] = (*fXmax)[ivar];
else
cutMax[ivar] = fTrandom->Rndm()*((*fXmax)[ivar] - cutMin[ivar] ) + cutMin[ivar];
if (fitParam == kForceVerySmart){
cutMin[ivar] = fTrandom->Rndm()*((*fXmax)[ivar] - (*fXmin)[ivar]) + (*fXmin)[ivar];
cutMax[ivar] = fTrandom->Rndm()*((*fXmax)[ivar] - cutMin[ivar] ) + cutMin[ivar];
}
if (cutMax[ivar] < cutMin[ivar]) {
cout << "--- " << GetName() << ": Error in ::Train: mismatch with cuts ==> abort"
<< endl;
exit(1);
}
}
Double_t effS = 0, effB = 0;
GetEffsfromSelection( &cutMin[0], &cutMax[0], effS, effB);
Int_t ibinS = (Int_t)(effS*Float_t(fNbins) + 1);
if (ibinS < 1 ) ibinS = 1;
if (ibinS > fNbins) ibinS = fNbins;
Double_t effBH = fEffBvsSLocal->GetBinContent( ibinS );
if (effBH < 0 || effBH > effB) {
fEffBvsSLocal->SetBinContent( ibinS, effB );
for (Int_t ivar=0; ivar<fNvar; ivar++) {
fCutMin[ivar][ibinS-1] = cutMin[ivar];
fCutMax[ivar][ibinS-1] = cutMax[ivar];
}
}
Int_t nout = 1000;
if ((Int_t)imc%nout == 0 || imc == fNRandCuts-1) {
Int_t nbinsF = 0, ibin_;
for (ibin_=0; ibin_<fNbins; ibin_++)
if (fEffBvsSLocal->GetBinContent( ibin_ +1 ) >= 0) nbinsF++;
if (nBinsFilled!=nbinsF) {
nBinsFilled = nbinsF;
nBinsFilledAt = imc;
}
timer.DrawProgressBar( imc );
if (imc == fNRandCuts-1 )
printf( "--- %s: fraction of efficiency bins filled: %3.1f\n",
GetName(), nbinsF/Float_t(fNbins) );
}
}
if (this->Verbose()){
cout << "--- TMVA::MethodCuts: fraction of filled eff. bins did not increase"
<< " anymore after "<< nBinsFilledAt << " cycles" << endl;
}
cout << "--- " << GetName() << ": elapsed time: " << timer.GetElapsedTime() << endl;
delete[] cutMin;
delete[] cutMax;
}
else if (fFitMethod == kUseGeneticAlgorithm) {
vector<LowHigh_t*> ranges;
for (Int_t ivar=0; ivar<fNvar; ivar++) {
(*fRangeSign)[ivar] = +1;
ranges.push_back( new LowHigh_t( (*fXmin)[ivar], (*fXmax)[ivar] ) );
ranges.push_back( new LowHigh_t( 0, (*fXmax)[ivar] - (*fXmin)[ivar] ) );
}
TMVA::GeneticCuts *bestResultsStore = new TMVA::GeneticCuts( 0, ranges );
TMVA::GeneticCuts *bestResults = new TMVA::GeneticCuts( 0, ranges );
cout << "--- " << GetName() << ": GA: entree, please be patient ..." << endl;
TMVA::Timer timer1( fGa_preCalc*fNbins, GetName() );
for (Int_t preCalc = 0; preCalc < fGa_preCalc; preCalc++) {
for (Int_t ibin=1; ibin<=fNbins; ibin++) {
timer1.DrawProgressBar( ibin + preCalc*fNbins );
fEffRef = fEffBvsSLocal->GetBinCenter( ibin );
TMVA::GeneticCuts ga( ranges.size() * 10, ranges );
ga.GetGeneticPopulation().AddPopulation( &bestResults->GetGeneticPopulation() );
ga.CalculateFitness();
ga.GetGeneticPopulation().TrimPopulation();
do {
ga.Init();
ga.CalculateFitness();
ga.SpreadControl( fGa_SC_steps, fGa_SC_offsteps, fGa_SC_factor );
} while (!ga.HasConverged( Int_t(fGa_nsteps*0.67), 0.0001 ));
bestResultsStore->GetGeneticPopulation().GiveHint( ga.GetGeneticPopulation().GetGenes( 0 )->GetFactors() );
}
delete bestResults;
bestResults = bestResultsStore;
bestResultsStore = new TMVA::GeneticCuts( 0, ranges );
}
bestResults->Init();
cout << "--- " << GetName() << ": GA: start main course "
<< endl;
TMVA::Timer timer2( fNbins, GetName() );
Double_t * cutMin = new Double_t[fNvar];
Double_t * cutMax = new Double_t[fNvar];
vector<Double_t> par(2*fNvar);
for (ibin=1; ibin<=fNbins; ibin++) {
timer2.DrawProgressBar( ibin );
fEffRef = fEffBvsSLocal->GetBinCenter( ibin );
TMVA::GeneticCuts ga( ranges.size() * 10, ranges );
ga.SetSpread( 0.1 );
ga.GetGeneticPopulation().AddPopulation( &bestResults->GetGeneticPopulation() );
ga.CalculateFitness();
ga.GetGeneticPopulation().TrimPopulation();
do {
ga.Init();
ga.CalculateFitness();
ga.SpreadControl( fGa_SC_steps, fGa_SC_offsteps, fGa_SC_factor );
} while (!ga.HasConverged( fGa_nsteps, 0.00001 ));
Int_t n;
n = 0;
vector< Double_t >::iterator vec = ga.GetGeneticPopulation().GetGenes( 0 )->GetFactors().begin();
for (; vec < ga.GetGeneticPopulation().GetGenes( 0 )->GetFactors().end(); vec++ ) {
par[n] = (*vec);
n++;
}
Double_t effS = 0, effB = 0;
this->MatchParsToCuts( par, &cutMin[0], &cutMax[0] );
this->GetEffsfromSelection( &cutMin[0], &cutMax[0], effS, effB);
for (Int_t ivar=0; ivar<fNvar; ivar++) {
fCutMin[ivar][ibin-1] = cutMin[ivar];
fCutMax[ivar][ibin-1] = cutMax[ivar];
}
}
delete[] cutMin;
delete[] cutMax;
cout << "--- " << GetName() << ": GA: elapsed time: " << timer1.GetElapsedTime()
<< endl;
}
else {
cerr << "--- " << GetName() << ": Error: unknown minization method: "
<< fFitMethod << " ==> abort" << endl;
exit(1);
}
WriteWeightsToFile();
WriteHistosToFile();
delete fEffBvsSLocal;
if (fBinaryTreeS) delete fBinaryTreeS;
if (fBinaryTreeB) delete fBinaryTreeB;
}
Double_t TMVA::MethodCuts::ComputeEstimator( const std::vector<Double_t> & par )
{
Double_t effS = 0, effB = 0;
this->MatchParsToCuts( par, &fTmpCutMin[0], &fTmpCutMax[0] );
switch (fEffMethod) {
case kUsePDFs:
this->GetEffsfromPDFs( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB );
break;
case kUseEventSelection:
this->GetEffsfromSelection( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
break;
default:
this->GetEffsfromSelection( &fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
}
const Double_t epsilon = 1.0e-06;
Double_t eta;
if (fConstrainType == kConstrainEffS) {
if (TMath::Abs(effS - fEffRef) > 0.001) eta = TMath::Abs(effB) + TMath::Abs(effS - fEffRef)/epsilon;
else eta = TMath::Abs(effB);
}
else if (fConstrainType == kConstrainEffB) {
eta = ( pow( (effB - fEffRef)/epsilon, 1 ) +
pow( 1.0/((effS > 0) ? effS : epsilon), 2 ) );
}
else eta = 0;
return eta;
}
void TMVA::MethodCuts::MatchParsToCuts( const std::vector<Double_t> & par,
Double_t* cutMin, Double_t* cutMax )
{
for (Int_t ivar=0; ivar<fNvar; ivar++) {
Int_t ipar = 2*ivar;
cutMin[ivar] = ((*fRangeSign)[ivar] > 0) ? par[ipar] : par[ipar] - par[ipar+1];
cutMax[ivar] = ((*fRangeSign)[ivar] > 0) ? par[ipar] + par[ipar+1] : par[ipar];
}
}
void TMVA::MethodCuts::MatchCutsToPars( Double_t* par,
Double_t* cutMin, Double_t* cutMax )
{
for (Int_t ivar=0; ivar<fNvar; ivar++) {
Int_t ipar = 2*ivar;
par[ipar] = ((*fRangeSign)[ivar] > 0) ? cutMin[ivar] : cutMax[ivar];
par[ipar+1] = cutMax[ivar] - cutMin[ivar];
}
}
void TMVA::MethodCuts::GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
Double_t& effS, Double_t& effB )
{
effS = 1.0;
effB = 1.0;
for (Int_t ivar=0; ivar<fNvar; ivar++) {
effS *= (*fVarPdfS)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
effB *= (*fVarPdfB)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
}
}
void TMVA::MethodCuts::GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
Double_t& effS, Double_t& effB)
{
Float_t nTotS = 0, nTotB = 0;
Float_t nSelS = 0, nSelB = 0;
TMVA::Volume* volume = new TMVA::Volume( cutMin, cutMax, fNvar );
nSelS = fBinaryTreeS->SearchVolume( volume );
nSelB = fBinaryTreeB->SearchVolume( volume );
nTotS = Float_t(fBinaryTreeS->GetSumOfWeights());
nTotB = Float_t(fBinaryTreeB->GetSumOfWeights());
delete volume;
if (nTotS == 0 && nTotB == 0) {
cout << "--- " << GetName()
<< ": fatal error in::ComputeEstimator: zero total number of events:"
<< " nTotS, nTotB: " << nTotS << " " << nTotB << " ***"
<< endl;
exit(1);
}
if (nTotS == 0 ) {
effS = 0;
effB = nSelB/nTotB;
cout << "--- " << GetName()
<< ": Warning in ::ComputeEstimator: zero number of events signal Events:\n";
}
else if ( nTotB == 0) {
effB = 0;
effS = nSelS/nTotS;
cout << "--- " << GetName()
<< ": Warning in ::ComputeEstimator: zero number of events background Events:\n";
}
else {
effS = nSelS/nTotS;
effB = nSelB/nTotB;
}
}
void TMVA::MethodCuts::CreateVariablePDFs( void )
{
fVarHistS = new vector<TH1*> ( fNvar );
fVarHistB = new vector<TH1*> ( fNvar );
fVarHistS_smooth = new vector<TH1*> ( fNvar );
fVarHistB_smooth = new vector<TH1*> ( fNvar );
fVarPdfS = new vector<TMVA::PDF*>( fNvar );
fVarPdfB = new vector<TMVA::PDF*>( fNvar );
Int_t nsmooth = 0;
for (Int_t ivar=0; ivar<fNvar; ivar++) {
TString histTitle = (*fInputVars)[ivar] + " signal training";
TString histName = (*fInputVars)[ivar] + "_sig";
TString drawOpt = (*fInputVars)[ivar] + ">>h(";
drawOpt += fNbins;
drawOpt += ")";
fTrainingTree->Draw( drawOpt, "type==1", "goff" );
(*fVarHistS)[ivar] = (TH1F*)gDirectory->Get("h");
(*fVarHistS)[ivar]->SetName(histName);
(*fVarHistS)[ivar]->SetTitle(histTitle);
(*fVarHistS_smooth)[ivar] = (TH1F*)(*fVarHistS)[ivar]->Clone();
histTitle = (*fInputVars)[ivar] + " signal training smoothed ";
histTitle += nsmooth;
histTitle +=" times";
histName = (*fInputVars)[ivar] + "_sig_smooth";
(*fVarHistS_smooth)[ivar]->SetName(histName);
(*fVarHistS_smooth)[ivar]->SetTitle(histTitle);
(*fVarHistS_smooth)[ivar]->Smooth(nsmooth);
histTitle = (*fInputVars)[ivar] + " background training";
histName = (*fInputVars)[ivar] + "_bgd";
drawOpt = (*fInputVars)[ivar] + ">>h(";
drawOpt += fNbins;
drawOpt += ")";
fTrainingTree->Draw( drawOpt, "type==0", "goff" );
(*fVarHistB)[ivar] = (TH1F*)gDirectory->Get("h");
(*fVarHistB)[ivar]->SetName(histName);
(*fVarHistB)[ivar]->SetTitle(histTitle);
(*fVarHistB_smooth)[ivar] = (TH1F*)(*fVarHistB)[ivar]->Clone();
histTitle = (*fInputVars)[ivar]+" background training smoothed ";
histTitle += nsmooth;
histTitle +=" times";
histName = (*fInputVars)[ivar]+"_bgd_smooth";
(*fVarHistB_smooth)[ivar]->SetName(histName);
(*fVarHistB_smooth)[ivar]->SetTitle(histTitle);
(*fVarHistB_smooth)[ivar]->Smooth(nsmooth);
(*fVarPdfS)[ivar] = new TMVA::PDF( (*fVarHistS_smooth)[ivar], TMVA::PDF::kSpline2 );
(*fVarPdfB)[ivar] = new TMVA::PDF( (*fVarHistB_smooth)[ivar], TMVA::PDF::kSpline2 );
}
}
Bool_t TMVA::MethodCuts::SanityChecks( void )
{
Bool_t isOK = kTRUE;
TObjArrayIter branchIter( fTrainingTree->GetListOfBranches(), kIterForward );
TBranch* branch = 0;
Int_t ivar = -1;
while ((branch = (TBranch*)branchIter.Next()) != 0) {
TString branchName = branch->GetName();
if (branchName != "type") {
ivar++;
if ((*fInputVars)[ivar] != branchName) {
cout << "Error in: " << GetName() << "::SanityChecks: mismatch in variables ==> abort"
<< endl;
isOK = kFALSE;
}
}
}
return isOK;
}
void TMVA::MethodCuts::WriteWeightsToFile( void )
{
TString fname = GetWeightFileName();
cout << "--- " << GetName() << ": creating weight file: " << fname << endl;
ofstream fout( fname );
if (!fout.good( )) {
cout << "--- " << GetName() << ": Error in ::WriteWeightsToFile: "
<< "unable to open output weight file: " << fname << endl;
exit(1);
}
fout << this->GetMethodName() <<endl;
fout << "NVars= " << fNvar <<endl;
Int_t ivar;
for (ivar=0; ivar<fNvar; ivar++) {
TString var = (*fInputVars)[ivar];
fout << var << " " << GetXminNorm( var ) << " " << GetXmaxNorm( var )
<< endl;
}
fout << "OptimisationMethod " << "nRandCuts " << "nbins:" << endl;
fout << ((fEffMethod == kUseEventSelection) ? "Fit-EventSelection" :
(fEffMethod == kUsePDFs) ? "Fit-PDF" : "Monte-Carlo") << " " ;
fout << fNRandCuts << " ";
fout << fNbins << endl;
fout << "the optimised cuts for " << fNvar << " variables" << endl;
fout << "format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0]"
<< " ... cutMin[ivar=n-1] cutMax[ivar=n-1]" << endl;
Int_t ibin;
for (ibin=0; ibin<fNbins; ibin++) {
fout << setw(4) << ibin+1 << " "
<< setw(8)<< fEffBvsSLocal->GetBinCenter( ibin +1 ) << " "
<< setw(8)<< fEffBvsSLocal->GetBinContent( ibin +1 ) << " ";
for (ivar=0; ivar<fNvar; ivar++)
fout <<setw(10)<< fCutMin[ivar][ibin] << " " << setw(10) << fCutMax[ivar][ibin] << " ";
fout << endl;
}
}
void TMVA::MethodCuts::ReadWeightsFromFile( void )
{
TString fname = GetWeightFileName();
cout << "--- " << GetName() << ": reading weight file: " << fname << endl;
ifstream fin( fname );
if (!fin.good( )) {
cout << "--- " << GetName() << ": Error in ::ReadWeightsFromFile: "
<< "unable to open input file: " << fname << endl;
exit(1);
}
TString var, dummy;
Double_t xmin, xmax;
fin >> dummy;
this->SetMethodName(dummy);
fin >> dummy >> fNvar;
Int_t ivar;
for (ivar=0; ivar<fNvar; ivar++) {
fin >> var >> xmin >> xmax;
if (var != (*fInputVars)[ivar]) {
cout << "--- " << GetName() << ": Error while reading weight file; "
<< "unknown variable: " << var << " at position: " << ivar << ". "
<< "Expected variable: " << (*fInputVars)[ivar] << " ==> abort"
<< endl;
exit(1);
}
this->SetXminNorm( ivar, xmin );
this->SetXmaxNorm( ivar, xmax );
}
fin >> dummy >> dummy >> dummy;
fin >> dummy >> fNRandCuts >> fNbins;
cout << "--- " << GetName() << ": Read cuts from "<< fNRandCuts << " MC events"
<< " in " << fNbins << " efficiency bins " << endl;
fin >> dummy >> dummy >> dummy >> dummy >>fNvar>>dummy ;
char buffer[200];
fin.getline(buffer,200);
fin.getline(buffer,200);
Int_t ibin;
Int_t tmpbin;
Float_t tmpeffS, tempeffB;
for (ibin=0; ibin<fNbins; ibin++) {
fin >> tmpbin >> tmpeffS >> tempeffB;
if (ibin == 0 ) fEffSMin = tmpeffS;
if (ibin == fNbins-1) fEffSMax = tmpeffS;
for (ivar=0; ivar<fNvar; ivar++) {
fin >> fCutMin[ivar][ibin] >> fCutMax[ivar][ibin];
}
}
}
void TMVA::MethodCuts::WriteHistosToFile( void )
{
cout << "--- " << GetName() << ": write " << GetName()
<< " special histos to file: " << fBaseDir->GetPath() << endl;
fEffBvsSLocal->Write();
if (fEffMethod == kUsePDFs) {
gDirectory->GetListOfKeys()->Print();
fBaseDir->mkdir(GetName()+GetMethodName())->cd();
for (Int_t ivar=0; ivar<fNvar; ivar++) {
(*fVarHistS)[ivar]->Write();
(*fVarHistB)[ivar]->Write();
(*fVarHistS_smooth)[ivar]->Write();
(*fVarHistB_smooth)[ivar]->Write();
(*fVarPdfS)[ivar]->GetPDFHist()->Write();
(*fVarPdfB)[ivar]->GetPDFHist()->Write();
}
}
}
void TMVA::MethodCuts::TestInitLocal( TTree *theTree )
{
cout << "--- " << GetName() << ": called TestInitLocal " <<endl;
fBinaryTreeS = new TMVA::BinarySearchTree();
fBinaryTreeS->Fill( theTree, fInputVars, 1 );
fBinaryTreeB = new TMVA::BinarySearchTree();
fBinaryTreeB->Fill( theTree, fInputVars, 0 );
}
Double_t TMVA::MethodCuts::GetEfficiency( TString theString, TTree * )
{
TList* list = TMVA::Tools::ParseFormatLine( theString );
if (list->GetSize() != 2) {
cout << "--- " << GetName() << ": Error in::GetEfficiency: wrong number of arguments"
<< " in string: " << theString
<< " | required format, e.g., Efficiency:0.05" << endl;
return -1;
}
Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
if (Verbose())
cout << "--- " << GetName() << "::GetEfficiency <verbose>: compute eff(S) at eff(B) = "
<< effBref << endl;
if ( fEffBvsS == NULL || fRejBvsS == NULL) {
if (NULL != fEffBvsS)delete fEffBvsS;
if (NULL != fRejBvsS)delete fRejBvsS;
fEffBvsS = new TH1F( fTestvar + "_effBvsS", fTestvar + "", fNbins, 0, 1 );
fRejBvsS = new TH1F( fTestvar + "_rejBvsS", fTestvar + "", fNbins, 0, 1 );
Double_t* tmpCutMin = new Double_t[fNvar];
Double_t* tmpCutMax = new Double_t[fNvar];
for (Int_t bini=1; bini<=fNbins; bini++) {
for (Int_t ivar=0; ivar <fNvar; ivar++){
tmpCutMin[ivar] = fCutMin[ivar][bini-1];
tmpCutMax[ivar] = fCutMax[ivar][bini-1];
}
Double_t effS, effB;
this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
fEffBvsS->SetBinContent( bini, effB );
fRejBvsS->SetBinContent( bini, 1.0-effB );
}
delete[] tmpCutMin;
delete[] tmpCutMax;
fGrapheffBvsS = new TGraph( fEffBvsS );
fSpleffBvsS = new TMVA::TSpline1( "effBvsS", fGrapheffBvsS );
}
if (NULL == fSpleffBvsS) return 0.0;
Double_t effS, effB, effS_ = 0, effB_ = 0;
Int_t nbins_ = 1000;
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSpleffBvsS->Eval( effS );
if ((effB - effBref)*(effB_ - effBref) < 0) break;
effS_ = effS;
effB_ = effB;
}
return 0.5*(effS + effS_);
}
ROOT page - Class index - Class Hierarchy - Top of the page
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.