/*
Virtual base Class for all MVA method
MethodBase hosts several specific evaluation methods
The kind of MVA that provides optimal performance in an analysis strongly
depends on the particular application. The evaluation factory provides a
number of numerical benchmark results to directly assess the performance
of the MVA training on the independent test sample. These are:
<ul>
<li> The <i>signal efficiency</i> at three representative background efficiencies
(which is 1 − rejection).</li>
<li> The <i>significance</I> of an MVA estimator, defined by the difference
between the MVA mean values for signal and background, divided by the
quadratic sum of their root mean squares.</li>
<li> The <i>separation</i> of an MVA <i>x</i>, defined by the integral
½∫(S(x) − B(x))<sup>2</sup>/(S(x) + B(x))dx, where
S(x) and B(x) are the signal and background distributions, respectively.
The separation is zero for identical signal and background MVA shapes,
and it is one for disjunctive shapes.
<li> <a name="mu_transform">
The average, ∫x μ(S(x))dx, of the signal μ-transform.
The μ-transform of an MVA denotes the transformation that yields
a uniform background distribution. In this way, the signal distributions
S(x) can be directly compared among the various MVAs. The stronger S(x)
peaks towards one, the better is the discrimination of the MVA. The
μ-transform is
<a href=http://tel.ccsd.cnrs.fr/documents/archives0/00/00/29/91/index_fr.html>documented here</a>.
</ul>
The MVA standard output also prints the linear correlation coefficients between
signal and background, which can be useful to eliminate variables that exhibit too
strong correlations.
*/
//End_Html
#include <string>
#include <sstream>
#include <fstream>
#include <stdlib.h>
#include "TROOT.h"
#include "TSystem.h"
#include "TObjString.h"
#include "TQObject.h"
#include "TSpline.h"
#include "TMatrix.h"
#include "TMath.h"
#include "TFile.h"
#include "TKey.h"
#ifndef ROOT_TMVA_MethodBase
#include "TMVA/MethodBase.h"
#endif
#ifndef ROOT_TMVA_Config
#include "TMVA/Config.h"
#endif
#ifndef ROOT_TMVA_Timer
#include "TMVA/Timer.h"
#endif
#ifndef ROOT_TMVA_RootFinder
#include "TMVA/RootFinder.h"
#endif
#ifndef ROOT_TMVA_PDF
#include "TMVA/PDF.h"
#endif
#ifndef ROOT_TMVA_VariableIdentityTransform
#include "TMVA/VariableIdentityTransform.h"
#endif
#ifndef ROOT_TMVA_VariableDecorrTransform
#include "TMVA/VariableDecorrTransform.h"
#endif
#ifndef ROOT_TMVA_VariablePCATransform
#include "TMVA/VariablePCATransform.h"
#endif
#ifndef ROOT_TMVA_Version
#include "TMVA/Version.h"
#endif
ClassImp(TMVA::MethodBase)
const Int_t MethodBase_MaxIterations_ = 200;
const Bool_t Use_Splines_for_Eff_ = kTRUE;
const int NBIN_HIST_PLOT = 100;
const int NBIN_HIST_HIGH = 10000;
TMVA::MethodBase::MethodBase( TString jobName,
TString methodTitle,
DataSet& theData,
TString theOption,
TDirectory* theBaseDir)
: IMethod(),
Configurable ( theOption ),
fData ( theData ),
fSignalReferenceCut ( 0.5 ),
fVariableTransformType ( Types::kSignal ),
fJobName ( jobName ),
fMethodName ( "MethodBase" ),
fMethodType ( Types::kMaxMethod ),
fMethodTitle ( methodTitle ),
fTestvar ( "" ),
fTestvarPrefix ( "MVA_" ),
fTMVATrainingVersion ( TMVA_VERSION_CODE ),
fROOTTrainingVersion ( ROOT_VERSION_CODE ),
fNvar ( theData.GetNVariables() ),
fBaseDir ( 0 ),
fMethodBaseDir ( theBaseDir ),
fWeightFile ( "" ),
fVarTransform ( 0 ),
fTrainEffS ( 0 ),
fTrainEffB ( 0 ),
fTrainEffBvsS ( 0 ),
fTrainRejBvsS ( 0 ),
fGraphTrainS ( 0 ),
fGraphTrainB ( 0 ),
fGraphTrainEffBvsS ( 0 ),
fSplTrainS ( 0 ),
fSplTrainB ( 0 ),
fSplTrainEffBvsS ( 0 ),
fMeanS ( -1 ),
fMeanB ( -1 ),
fRmsS ( -1 ),
fRmsB ( -1 ),
fXmin ( 1e30 ),
fXmax ( -1e30 ),
fVariableTransform ( Types::kNone ),
fVerbose ( kFALSE ),
fHelp ( kFALSE ),
fIsMVAPdfs ( kFALSE ),
fTxtWeightsOnly ( kTRUE ),
fSplRefS ( 0 ),
fSplRefB ( 0 ),
fLogger ( this )
{
this->Init();
DeclareOptions();
fFileDir = gConfig().GetIONames().fWeightFileDir;
gSystem->MakeDirectory( fFileDir );
}
TMVA::MethodBase::MethodBase( DataSet& theData,
TString weightFile,
TDirectory* theBaseDir )
: IMethod(),
Configurable(""),
fData ( theData ),
fSignalReferenceCut ( 0.5 ),
fVariableTransformType ( Types::kSignal ),
fJobName ( "" ),
fMethodName ( "MethodBase" ),
fMethodType ( Types::kMaxMethod ),
fMethodTitle ( "" ),
fTestvar (""),
fTestvarPrefix ("MVA_"),
fTMVATrainingVersion ( 0 ),
fROOTTrainingVersion ( 0 ),
fNvar ( theData.GetNVariables() ),
fBaseDir ( theBaseDir ),
fMethodBaseDir ( 0 ),
fWeightFile ( weightFile ),
fVarTransform ( 0 ),
fTrainEffS ( 0 ),
fTrainEffB ( 0 ),
fTrainEffBvsS ( 0 ),
fTrainRejBvsS ( 0 ),
fGraphTrainS ( 0 ),
fGraphTrainB ( 0 ),
fGraphTrainEffBvsS ( 0 ),
fSplTrainS ( 0 ),
fSplTrainB ( 0 ),
fSplTrainEffBvsS ( 0 ),
fMeanS ( -1 ),
fMeanB ( -1 ),
fRmsS ( -1 ),
fRmsB ( -1 ),
fXmin ( 1e30 ),
fXmax ( -1e30 ),
fVariableTransform ( Types::kNone ),
fVerbose ( kFALSE ),
fHelp ( kFALSE ),
fIsMVAPdfs ( kFALSE ),
fTxtWeightsOnly ( kTRUE ),
fSplRefS ( 0 ),
fSplRefB ( 0 ),
fLogger ( this )
{
this->Init();
DeclareOptions();
}
TMVA::MethodBase::~MethodBase( void )
{
if (fInputVars != 0) { fInputVars->clear(); delete fInputVars; }
if (fRanking != 0) delete fRanking;
if (fMVAPdfS != 0) delete fMVAPdfS;
if (fMVAPdfB != 0) delete fMVAPdfB;
}
void TMVA::MethodBase::Init()
{
fIsOK = kTRUE;
fNbins = NBIN_HIST_PLOT;
fNbinsH = NBIN_HIST_HIGH;
fRanking = NULL;
fHistS_plotbin = NULL;
fHistB_plotbin = NULL;
fProbaS_plotbin = NULL;
fProbaB_plotbin = NULL;
fRarityS_plotbin = NULL;
fRarityB_plotbin = NULL;
fHistS_highbin = NULL;
fHistB_highbin = NULL;
fEffS = NULL;
fEffB = NULL;
fEffBvsS = NULL;
fRejBvsS = NULL;
finvBeffvsSeff = NULL;
fHistBhatS = NULL;
fHistBhatB = NULL;
fHistMuS = NULL;
fHistMuB = NULL;
fMVAPdfS = NULL;
fMVAPdfB = NULL;
fInputVars = new vector<TString>;
for (UInt_t ivar=0; ivar<Data().GetNVariables(); ivar++)
fInputVars->push_back(Data().GetInternalVarName(ivar));
ResetThisBase();
}
void TMVA::MethodBase::DeclareOptions()
{
DeclareOptionRef( fUseDecorr=kFALSE, "D", "use-decorrelated-variables flag (depreciated)" );
DeclareOptionRef( fNormalise=kFALSE, "Normalise", "Normalise input variables" );
DeclareOptionRef( fVarTransformString="None", "VarTransform", "Variable transformation method" );
AddPreDefVal( TString("None") );
AddPreDefVal( TString("Decorrelate") );
AddPreDefVal( TString("PCA") );
DeclareOptionRef( fVariableTransformTypeString="Signal", "VarTransformType",
"Use signal or background events for var transform" );
AddPreDefVal( TString("Signal") );
AddPreDefVal( TString("Background") );
DeclareOptionRef( fNbinsMVAPdf = 60, "NbinsMVAPdf", "Number of bins used to create MVA PDF" );
DeclareOptionRef( fNsmoothMVAPdf = 2, "NsmoothMVAPdf", "Number of smoothing iterations for MVA PDF" );
DeclareOptionRef( fVerbose, "V", "Verbose mode" );
DeclareOptionRef( fVerbosityLevelString="Info", "VerboseLevel", "Verbosity level" );
AddPreDefVal( TString("Debug") );
AddPreDefVal( TString("Verbose") );
AddPreDefVal( TString("Info") );
AddPreDefVal( TString("Warning") );
AddPreDefVal( TString("Error") );
AddPreDefVal( TString("Fatal") );
DeclareOptionRef( fHelp, "H", "Print classifier-specific help message" );
DeclareOptionRef( fIsMVAPdfs=kFALSE, "CreateMVAPdfs", "Create PDFs for classifier outputs" );
DeclareOptionRef( fTxtWeightsOnly=kTRUE, "TxtWeightFilesOnly", "if True, write all weights as text files" );
}
void TMVA::MethodBase::ProcessOptions()
{
if (fVarTransformString == "None") fVariableTransform = Types::kNone;
else if (fVarTransformString == "Decorrelate" ) fVariableTransform = Types::kDecorrelated;
else if (fVarTransformString == "PCA" ) fVariableTransform = Types::kPCA;
else {
fLogger << kFATAL << "<ProcessOptions> Variable transform '"
<< fVarTransformString << "' unknown." << Endl;
}
if ((fVariableTransform == Types::kNone) && fUseDecorr) fVariableTransform = Types::kDecorrelated;
if (fVariableTransformTypeString == "Signal") fVariableTransformType = Types::kSignal;
else if (fVariableTransformTypeString == "Background" ) fVariableTransformType = Types::kBackground;
else {
fLogger << kFATAL << "<ProcessOptions> Variable transformation type '"
<< fVariableTransformTypeString << "' unknown." << Endl;
}
if (fVarTransform == 0) fVarTransform = Data().GetTransform( fVariableTransform );
if (fVerbosityLevelString == "Debug" ) fLogger.SetMinType( kDEBUG );
else if (fVerbosityLevelString == "Verbose" ) fLogger.SetMinType( kVERBOSE );
else if (fVerbosityLevelString == "Info" ) fLogger.SetMinType( kINFO );
else if (fVerbosityLevelString == "Warning" ) fLogger.SetMinType( kWARNING );
else if (fVerbosityLevelString == "Error" ) fLogger.SetMinType( kERROR );
else if (fVerbosityLevelString == "Fatal" ) fLogger.SetMinType( kFATAL );
else {
fLogger << kFATAL << "<ProcessOptions> Verbosity level type '"
<< fVerbosityLevelString << "' unknown." << Endl;
}
if (Verbose()) fLogger.SetMinType( kVERBOSE );
}
void TMVA::MethodBase::PrintHelpMessage() const
{
fLogger << kINFO << Endl;
fLogger << Tools::Color("bold")
<< "================================================================"
<< Tools::Color( "reset" )
<< Endl;
fLogger << Tools::Color("bold")
<< "H e l p f o r c l a s s i f i e r [ " << GetName() << " ] :"
<< Tools::Color( "reset" )
<< Endl;
GetHelpMessage();
fLogger << Endl;
fLogger << "<Suppress this message by specifying \"!H\" in the booking option>" << Endl;
fLogger << Tools::Color("bold")
<< "================================================================"
<< Tools::Color( "reset" )
<< Endl;
fLogger << Endl;
}
void TMVA::MethodBase::TrainMethod()
{
if (Help()) PrintHelpMessage();
BaseDir()->cd();
Train();
if (IsMVAPdfs()) CreateMVAPdfs();
WriteStateToFile();
MakeClass();
BaseDir()->cd();
WriteMonitoringHistosToFile();
}
Double_t TMVA::MethodBase::GetProba( Double_t mvaVal, Double_t ap_sig )
{
if (!fMVAPdfS || !fMVAPdfB) {
fLogger << kWARNING << "<GetProba> MVA PDFs for Signal and Backgroud don't exist" << Endl;
return 0;
}
Double_t p_s = fMVAPdfS->GetVal( mvaVal );
Double_t p_b = fMVAPdfB->GetVal( mvaVal );
Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
return (denom > 0) ? (p_s*ap_sig) / denom : -1;
}
Double_t TMVA::MethodBase::GetRarity( Double_t mvaVal, Types::ESBType reftype ) const
{
if ((reftype == Types::kSignal && !fMVAPdfS) || (reftype == Types::kBackground && !fMVAPdfB)) {
fLogger << kWARNING << "<GetRarity> Required MVA PDF for Signal or Backgroud does not exist: "
<< "select option \"CreateMVAPdfs\"" << Endl;
return 0.0;
}
PDF* thePdf = reftype == Types::kSignal ? fMVAPdfS : fMVAPdfB;
return thePdf->GetIntegral( thePdf->GetXmin(), mvaVal );
}
void TMVA::MethodBase::CreateMVAPdfs()
{
fLogger << kINFO << "<CreateMVAPdfs> Using " << fNbinsMVAPdf << " bins and smooth "
<< fNsmoothMVAPdf << " times" << Endl;
vector<Double_t>* sigVec = new vector<Double_t>;
vector<Double_t>* bkgVec = new vector<Double_t>;
Double_t minVal=9999;
Double_t maxVal=-9999;
for (Int_t ievt=0; ievt<Data().GetNEvtTrain(); ievt++) {
ReadTrainingEvent(ievt);
Double_t theVal = this->GetMvaValue();
if (minVal>theVal) minVal = theVal;
if (maxVal<theVal) maxVal = theVal;
if (IsSignalEvent()) sigVec->push_back(theVal);
else bkgVec->push_back(theVal);
}
TH1* histMVAPdfS = new TH1F( GetMethodName() + "_tr_S", GetMethodName() + "_tr_S",
fNbinsMVAPdf, minVal, maxVal );
TH1* histMVAPdfB = new TH1F( GetMethodName() + "_tr_B", GetMethodName() + "_tr_B",
fNbinsMVAPdf, minVal, maxVal );
histMVAPdfS->Sumw2();
histMVAPdfB->Sumw2();
for (Int_t i=0; i<(Int_t)sigVec->size(); i++) histMVAPdfS->Fill( (*sigVec)[i] );
for (Int_t i=0; i<(Int_t)bkgVec->size(); i++) histMVAPdfB->Fill( (*bkgVec)[i] );
delete sigVec;
delete bkgVec;
Tools::NormHist( histMVAPdfS );
Tools::NormHist( histMVAPdfB );
histMVAPdfS->Write();
histMVAPdfB->Write();
fMVAPdfS = new PDF( histMVAPdfS, PDF::kSpline2, fNsmoothMVAPdf );
fMVAPdfB = new PDF( histMVAPdfB, PDF::kSpline2, fNsmoothMVAPdf );
fMVAPdfS->ValidatePDF( histMVAPdfS );
fMVAPdfB->ValidatePDF( histMVAPdfB );
fLogger << kINFO
<< Form( "<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
<< Endl;
delete histMVAPdfS;
delete histMVAPdfB;
}
void TMVA::MethodBase::WriteStateToStream( std::ostream& tf, Bool_t isClass ) const
{
TString prefix = "";
tf << prefix << "#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << endl << prefix << endl;
tf << prefix << "Method : " << GetMethodName() << "::" << GetMethodTitle() << endl;
tf.setf(std::ios::left);
tf << prefix << "TMVA Release : " << setw(10) << GetTrainingTMVAVersionString() << " [" << GetTrainingTMVAVersionCode() << "]" << endl;
tf << prefix << "ROOT Release : " << setw(10) << GetTrainingROOTVersionString() << " [" << GetTrainingROOTVersionCode() << "]" << endl;
tf << prefix << "Creator : " << gSystem->GetUserInfo()->fUser << endl;
tf << prefix << "Date : "; TDatime *d = new TDatime; tf << d->AsString() << endl; delete d;
tf << prefix << "Host : " << gSystem->GetBuildNode() << endl;
tf << prefix << "Dir : " << gSystem->WorkingDirectory() << endl;
tf << prefix << "Training events: " << Data().GetNEvtTrain() << endl;
tf << prefix << endl;
tf << prefix << endl << prefix << "#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << endl << prefix << endl;
WriteOptionsToStream( tf, prefix );
tf << prefix << endl;
tf << prefix << endl << prefix << "#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << endl << prefix << endl;
GetVarTransform().WriteVarsToStream( tf, prefix );
tf << prefix << endl;
if (!isClass) {
tf << endl << "#MAT -*-*-*-*-*-*-*-*-* transformation data -*-*-*-*-*-*-*-*-*-" << endl;
GetVarTransform().WriteTransformationToStream( tf );
tf << endl;
if (IsMVAPdfs()) {
tf << endl << "#MVAPDFS -*-*-*-*-*-*-*-*-*-*-* MVA PDFS -*-*-*-*-*-*-*-*-*-*-*-" << endl;
tf << *fMVAPdfS << endl;
tf << *fMVAPdfB << endl;
tf << endl;
}
tf << endl << "#WGT -*-*-*-*-*-*-*-*-*-*-*-*- weights -*-*-*-*-*-*-*-*-*-*-*-*-" << endl << endl;
WriteWeightsToStream( tf );
}
}
void TMVA::MethodBase::WriteStateToStream( TFile& rf ) const
{
rf.cd();
if (fMVAPdfS && fMVAPdfB) {
fMVAPdfS->Write("MVA_PDF_Signal");
fMVAPdfB->Write("MVA_PDF_Background");
}
WriteWeightsToStream( rf );
}
void TMVA::MethodBase::ReadStateFromStream( TFile& rf )
{
Bool_t addDirStatus = TH1::AddDirectoryStatus();
TH1::AddDirectory( 0 );
fMVAPdfS = (PDF*)rf.Get( "MVA_PDF_Signal" );
fMVAPdfB = (PDF*)rf.Get( "MVA_PDF_Background" );
TH1::AddDirectory( addDirStatus );
ReadWeightsFromStream( rf );
}
void TMVA::MethodBase::WriteStateToFile() const
{
TString tfname( GetWeightFileName() );
fLogger << kINFO << "Creating weight file in text format: "
<< Tools::Color("lightblue") << tfname << Tools::Color("reset") << Endl;
ofstream tfile( tfname );
if (!tfile.good()) {
fLogger << kFATAL << "<WriteStateToFile> "
<< "Unable to open output weight file: " << tfname << Endl;
}
WriteStateToStream( tfile );
tfile.close();
if ( ! fTxtWeightsOnly ) {
TString rfname( tfname ); rfname.ReplaceAll( ".txt", ".root" );
fLogger << kINFO << "Creating weight file in root format: "
<< Tools::Color("lightblue") << rfname << Tools::Color("reset") << Endl;
TFile* rfile = TFile::Open( rfname, "RECREATE" );
WriteStateToStream( *rfile );
rfile->Close();
}
}
void TMVA::MethodBase::ReadStateFromFile()
{
TString tfname(GetWeightFileName());
fLogger << kINFO << "Reading weight file: "
<< Tools::Color("lightblue") << tfname << Tools::Color("reset") << Endl;
ifstream fin( tfname );
if (!fin.good()) {
fLogger << kFATAL << "<ReadStateFromFile> "
<< "Unable to open input weight file: " << tfname << Endl;
}
ReadStateFromStream(fin);
fin.close();
if (!fTxtWeightsOnly) {
TString rfname( tfname ); rfname.ReplaceAll( ".txt", ".root" );
fLogger << kINFO << "Reading root weight file: "
<< Tools::Color("lightblue") << rfname << Tools::Color("reset") << Endl;
TFile* rfile = TFile::Open( rfname, "READ" );
ReadStateFromStream( *rfile );
rfile->Close();
}
}
bool TMVA::MethodBase::GetLine(std::istream& fin, char * buf )
{
fin.getline(buf,512);
TString line(buf);
if (line.BeginsWith("TMVA Release")) {
Ssiz_t start = line.First('[')+1;
Ssiz_t length = line.Index("]",start)-start;
TString code = line(start,length);
std::stringstream s(code.Data());
s >> fTMVATrainingVersion;
fLogger << kINFO << "Classifier was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
}
if (line.BeginsWith("ROOT Release")) {
Ssiz_t start = line.First('[')+1;
Ssiz_t length = line.Index("]",start)-start;
TString code = line(start,length);
std::stringstream s(code.Data());
s >> fROOTTrainingVersion;
fLogger << kINFO << "Classifier was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
}
return true;
}
void TMVA::MethodBase::ReadStateFromStream( std::istream& fin )
{
char buf[512];
GetLine(fin,buf);
while (!TString(buf).BeginsWith("Method")) GetLine(fin,buf);
TString ls(buf);
Int_t idx1 = ls.First(':')+2; Int_t idx2 = ls.Index(' ',idx1)-idx1; if (idx2<0) idx2=ls.Length();
TString fullname = ls(idx1,idx2);
idx1 = fullname.First(':');
Int_t idxtit = (idx1<0 ? fullname.Length() : idx1);
TString methodName = fullname(0, idxtit);
TString methodTitle;
Bool_t notit;
if (idx1<0) {
methodTitle=methodName;
notit=kTRUE;
}
else {
methodTitle=fullname(idxtit+2,fullname.Length()-1);
notit=kFALSE;
}
fLogger << kINFO << "Read method with name <" << methodName << "> and title <" << methodTitle << ">" << Endl;
this->SetMethodName( methodName );
this->SetMethodTitle( methodTitle );
GetLine(fin,buf);
while (!TString(buf).BeginsWith("#OPT")) GetLine(fin,buf);
ReadOptionsFromStream(fin);
ParseOptions(Verbose());
fLogger << kINFO << "Create VariableTransformation \"" << fVarTransformString << "\"" << Endl;
if (fVarTransformString == "None" ) {
fVarTransform = new VariableIdentityTransform( Data().GetVariableInfos() );
}
else if (fVarTransformString == "Decorrelate" ) {
fVarTransform = new VariableDecorrTransform( Data().GetVariableInfos() );
}
else if (fVarTransformString == "PCA" ) {
fVarTransform = new VariablePCATransform( Data().GetVariableInfos() );
}
fin.getline(buf,512);
while (!TString(buf).BeginsWith("#VAR")) fin.getline(buf,512);
GetVarTransform().ReadVarsFromStream(fin);
ProcessOptions();
if (GetVariableTransform() != Types::kNone) {
fin.getline(buf,512);
while (!TString(buf).BeginsWith("#MAT")) fin.getline(buf,512);
GetVarTransform().ReadTransformationFromStream(fin);
}
if (IsMVAPdfs()) {
fin.getline(buf,512);
while (!TString(buf).BeginsWith("#MVAPDFS")) fin.getline(buf,512);
if (fMVAPdfS!=0) { delete fMVAPdfS; fMVAPdfS = 0; }
if (fMVAPdfB!=0) { delete fMVAPdfB; fMVAPdfB = 0; }
fMVAPdfS = new PDF();
fMVAPdfB = new PDF();
fMVAPdfS->SetReadingVersion( GetTrainingTMVAVersionCode() );
fMVAPdfB->SetReadingVersion( GetTrainingTMVAVersionCode() );
fin >> *fMVAPdfS;
fin >> *fMVAPdfB;
}
fin.getline(buf,512);
while (!TString(buf).BeginsWith("#WGT")) fin.getline(buf,512);
fin.getline(buf,512);
ReadWeightsFromStream( fin );
}
TDirectory* TMVA::MethodBase::BaseDir() const
{
if (fBaseDir != 0) {
return fBaseDir;
}
TDirectory* methodDir = MethodBaseDir();
if (methodDir==0) {
fLogger << kFATAL << "MethodBase::BaseDir() - MethodBaseDir() return a NUL pointer!" << Endl;
}
TDirectory* dir = 0;
TString defaultDir = GetMethodTitle();
TObject* o = methodDir->FindObject(defaultDir);
if (o!=0 && o->InheritsFrom("TDirectory")) dir = (TDirectory*)o;
if (dir != 0) {
return dir;
}
TDirectory *sdir = methodDir->mkdir(defaultDir);
return sdir;
}
TDirectory* TMVA::MethodBase::MethodBaseDir() const
{
if (fMethodBaseDir != 0) return fMethodBaseDir;
TDirectory* dir = 0;
TString defaultDir = "Method_" + GetMethodName();
TObject* o = Data().BaseRootDir()->FindObject(defaultDir);
if (o!=0 && o->InheritsFrom("TDirectory")) dir = (TDirectory*)o;
if (dir != 0) return dir;
return Data().BaseRootDir()->mkdir(defaultDir);
}
void TMVA::MethodBase::SetWeightFileName( TString theWeightFile)
{
fWeightFile = theWeightFile;
}
TString TMVA::MethodBase::GetWeightFileName() const
{
if (fWeightFile!="") return fWeightFile;
TString suffix = "";
return fFileDir + "/" + fJobName + "_" + GetMethodTitle() + suffix + "." + gConfig().GetIONames().fWeightFileExtension + ".txt";
}
Bool_t TMVA::MethodBase::CheckSanity( TTree* theTree )
{
TTree* tree = (0 != theTree) ? theTree : Data().GetTrainingTree();
for (Int_t i=0; i<GetNvar(); i++)
if (0 == tree->FindBranch( GetInputVar(i) )) return kFALSE;
return kTRUE;
}
void TMVA::MethodBase::SetWeightFileDir( TString fileDir )
{
fFileDir = fileDir;
gSystem->MakeDirectory( fFileDir );
}
Double_t TMVA::MethodBase::Norm( TString var, Double_t x ) const
{
return Tools::NormVariable( x, GetXmin( var ), GetXmax( var ) );
}
Double_t TMVA::MethodBase::Norm( Int_t ivar, Double_t x ) const
{
return Tools::NormVariable( x, GetXmin( ivar ), GetXmax( ivar ) );
}
void TMVA::MethodBase::TestInit(TTree* theTestTree)
{
if (theTestTree == 0)
theTestTree = Data().GetTestTree();
fHistS_plotbin = fHistB_plotbin = 0;
fProbaS_plotbin = fProbaB_plotbin = 0;
fRarityS_plotbin = fRarityB_plotbin = 0;
fHistS_highbin = fHistB_highbin = 0;
fEffS = fEffB = fEffBvsS = fRejBvsS = 0;
fGraphS = fGraphB = 0;
fCutOrientation = kPositive;
fSplS = fSplB = 0;
fSplRefS = fSplRefB = 0;
if (0 == theTestTree) {
fLogger << kFATAL << "<TestInit> Test tree has zero pointer " << Endl;
fIsOK = kFALSE;
}
if ( 0 == theTestTree->FindBranch( GetTestvarName() ) && !(GetMethodName().Contains("Cuts"))) {
fLogger << kFATAL << "<TestInit> Test variable " << GetTestvarName()
<< " not found in tree" << Endl;
fIsOK = kFALSE;
}
}
void TMVA::MethodBase::PrepareEvaluationTree( TTree* testTree )
{
if (0 == testTree) testTree = Data().GetTestTree();
if (!CheckSanity( testTree )) {
fLogger << kFATAL << "<PrepareEvaluationTree> Sanity check failed" << Endl;
}
this->ReadStateFromFile();
Timer timer( testTree->GetEntries(), GetName(), kTRUE );
Data().BaseRootDir()->cd();
Float_t myMVA = 0;
Float_t myProba = 0;
TBranch* newBranchMVA = testTree->Branch( GetTestvarName(), &myMVA, GetTestvarName() + "/F", 128000 );
newBranchMVA->SetFile(testTree->GetDirectory()->GetFile());
TBranch* newBranchProba = 0;
if (IsMVAPdfs()) {
newBranchProba = testTree->Branch( GetProbaName(), &myProba, GetProbaName() + "/F", 128000 );
newBranchProba->SetFile(testTree->GetDirectory()->GetFile());
}
fLogger << kINFO << "Preparing evaluation tree... " << Endl;
for (Int_t ievt=0; ievt<testTree->GetEntries(); ievt++) {
ReadTestEvent(ievt);
newBranchMVA->SetAddress( &myMVA );
myMVA = (Float_t)GetMvaValue();
newBranchMVA->Fill();
if (newBranchProba) {
newBranchProba->SetAddress( &myProba );
myProba = (Float_t)GetProba( myMVA, 0.5 );
newBranchProba->Fill();
}
Int_t modulo = Int_t(testTree->GetEntries()/100);
if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
}
Data().BaseRootDir()->Write("",TObject::kOverwrite);
fLogger << kINFO << "Elapsed time for evaluation of "
<< testTree->GetEntries() << " events: "
<< timer.GetElapsedTime() << " " << Endl;
newBranchMVA ->ResetAddress();
if (newBranchProba) newBranchProba->ResetAddress();
}
void TMVA::MethodBase::Test( TTree *theTestTree )
{
if (theTestTree == 0) theTestTree = Data().GetTestTree();
Double_t meanS, meanB, rmsS, rmsB, xmin, xmax;
TMVA::Tools::ComputeStat( theTestTree, GetTestvarName(), meanS, meanB, rmsS, rmsB, xmin, xmax );
Double_t nrms = 10;
xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax );
fMeanS = meanS; fMeanB = meanB;
fRmsS = rmsS; fRmsB = rmsB;
fXmin = xmin; fXmax = xmax;
fCutOrientation = (fMeanS > fMeanB) ? kPositive : kNegative;
Double_t sxmax = fXmax+0.00001;
if (fHistS_plotbin ) { delete fHistS_plotbin; fHistS_plotbin = 0; }
if (fHistB_plotbin ) { delete fHistB_plotbin; fHistB_plotbin = 0; }
if (fProbaS_plotbin ) { delete fProbaS_plotbin; fProbaS_plotbin = 0; }
if (fProbaB_plotbin ) { delete fProbaB_plotbin; fProbaB_plotbin = 0; }
if (fRarityS_plotbin) { delete fRarityS_plotbin; fRarityS_plotbin = 0; }
if (fRarityB_plotbin) { delete fRarityB_plotbin; fRarityB_plotbin = 0; }
if (fHistS_highbin ) { delete fHistS_highbin; fHistS_highbin = 0; }
if (fHistB_highbin ) { delete fHistB_highbin; fHistB_highbin = 0; }
fHistS_plotbin = new TH1F( GetTestvarName() + "_S",GetTestvarName() + "_S", fNbins, fXmin, sxmax );
fHistB_plotbin = new TH1F( GetTestvarName() + "_B",GetTestvarName() + "_B", fNbins, fXmin, sxmax );
if (IsMVAPdfs()) {
fProbaS_plotbin = new TH1F( GetTestvarName() + "_Proba_S",GetTestvarName() + "_Proba_S", fNbins, 0.0, 1.0 );
fProbaB_plotbin = new TH1F( GetTestvarName() + "_Proba_B",GetTestvarName() + "_Proba_B", fNbins, 0.0, 1.0 );
fRarityS_plotbin = new TH1F( GetTestvarName() + "_Rarity_S",GetTestvarName() + "_Rarity_S", fNbins, 0.0, 1.0 );
fRarityB_plotbin = new TH1F( GetTestvarName() + "_Rarity_B",GetTestvarName() + "_Rarity_B", fNbins, 0.0, 1.0 );
}
fHistS_highbin = new TH1F( GetTestvarName() + "_S_high",GetTestvarName() + "_S_high", fNbinsH, fXmin, sxmax );
fHistB_highbin = new TH1F( GetTestvarName() + "_B_high",GetTestvarName() + "_B_high", fNbinsH, fXmin, sxmax );
fHistS_plotbin ->Sumw2();
fHistB_plotbin ->Sumw2();
if (IsMVAPdfs()) {
fProbaS_plotbin->Sumw2();
fProbaB_plotbin->Sumw2();
fRarityS_plotbin->Sumw2();
fRarityB_plotbin->Sumw2();
}
fHistS_highbin ->Sumw2();
fHistB_highbin ->Sumw2();
theTestTree->ResetBranchAddresses();
Float_t v, p;
Float_t w;
Int_t t;
TBranch* vbranch = theTestTree->GetBranch( GetTestvarName() );
TBranch* pbranch = 0;
if (IsMVAPdfs()) pbranch = theTestTree->GetBranch( GetProbaName() );
TBranch* wbranch = theTestTree->GetBranch( "weight" );
TBranch* tbranch = theTestTree->GetBranch( "type" );
if (!vbranch || !wbranch || !tbranch)
fLogger << kFATAL << "<Test> Mismatch in test tree: "
<< vbranch << " " << pbranch << " " << wbranch << " " << tbranch << Endl;
vbranch->SetAddress( &v );
if (pbranch) pbranch->SetAddress( &p );
wbranch->SetAddress( &w );
tbranch->SetAddress( &t );
fLogger << kINFO << "Loop over test events and fill histograms with classifier response ..." << Endl;
if (pbranch) fLogger << kINFO << "Also filling probability and rarity histograms (on request) ..." << Endl;
for (Int_t ievt=0; ievt<Data().GetNEvtTest(); ievt++) {
theTestTree->GetEntry(ievt);
if (t == 1) {
fHistS_plotbin ->Fill( v, w );
if (pbranch) {
fProbaS_plotbin ->Fill( p, w );
fRarityS_plotbin->Fill( GetRarity( v ), w );
}
fHistS_highbin ->Fill( v, w );
}
else {
fHistB_plotbin ->Fill( v, w );
if (pbranch) {
fProbaB_plotbin->Fill( p, w );
fRarityB_plotbin->Fill( GetRarity( v ), w );
}
fHistB_highbin ->Fill( v, w );
}
}
theTestTree->ResetBranchAddresses();
Tools::NormHist( fHistS_plotbin );
Tools::NormHist( fHistB_plotbin );
if (pbranch) {
Tools::NormHist( fProbaS_plotbin );
Tools::NormHist( fProbaB_plotbin );
Tools::NormHist( fRarityS_plotbin );
Tools::NormHist( fRarityB_plotbin );
}
Tools::NormHist( fHistS_highbin );
Tools::NormHist( fHistB_highbin );
fHistS_plotbin ->SetDirectory(0);
fHistB_plotbin ->SetDirectory(0);
if (pbranch) {
fProbaS_plotbin->SetDirectory(0);
fProbaB_plotbin->SetDirectory(0);
fRarityS_plotbin->SetDirectory(0);
fRarityB_plotbin->SetDirectory(0);
}
fHistS_highbin ->SetDirectory(0);
fHistB_highbin ->SetDirectory(0);
fSplS = new PDF( fHistS_plotbin, PDF::kSpline2 );
fSplB = new PDF( fHistB_plotbin, PDF::kSpline2 );
}
Double_t TMVA::MethodBase::GetEfficiency( TString theString, TTree *theTree, Double_t& effSerr )
{
if (theTree == 0) fLogger << kFATAL << "<GetEfficiency> Object theTree has zero pointer" << Endl;
TList* list = Tools::ParseFormatLine( theString );
Bool_t computeArea = kFALSE;
if (!list || list->GetSize() < 2) computeArea = kTRUE;
else if (list->GetSize() > 2) {
fLogger << kFATAL << "<GetEfficiency> Wrong number of arguments"
<< " in string: " << theString
<< " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
return -1;
}
if (fHistS_highbin->GetNbinsX() != fHistB_highbin->GetNbinsX() ||
fHistS_plotbin->GetNbinsX() != fHistB_plotbin->GetNbinsX()) {
fLogger << kWARNING << "<GetEfficiency> Binning mismatch between signal and background histos" << Endl;
fIsOK = kFALSE;
return -1.0;
}
Double_t xmin = fHistS_highbin->GetXaxis()->GetXmin();
Double_t xmax = fHistS_highbin->GetXaxis()->GetXmax();
Bool_t firstPass = kFALSE;
static Double_t nevtS;
if (NULL == fEffS && NULL == fEffB) firstPass = kTRUE;
if (firstPass) {
fEffS = new TH1F( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbinsH, xmin, xmax );
fEffB = new TH1F( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbinsH, xmin, xmax );
Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
Int_t theType;
Float_t theVal;
theTree->ResetBranchAddresses();
TBranch* brType = theTree->GetBranch("type");
TBranch* brVal = theTree->GetBranch(GetTestvarName());
if (brVal == 0) {
fLogger << kFATAL << "Could not find variable "
<< GetTestvarName() << " in tree " << theTree->GetName() << Endl;
}
brType->SetAddress(&theType);
brVal ->SetAddress(&theVal );
nevtS = 0;
for (Int_t ievt=0; ievt<theTree->GetEntries(); ievt++) {
brType->GetEntry(ievt);
brVal ->GetEntry(ievt);
TH1* theHist = (theType == 1) ? fEffS : fEffB;
if (theType == 1) nevtS++;
TAxis* axis = theHist->GetXaxis();
Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
if (sign > 0 && maxbin > fNbinsH) continue;
if (sign < 0 && maxbin < 1 ) continue;
if (sign > 0 && maxbin < 1 ) maxbin = 1;
if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
if (sign > 0)
for (Int_t ibin=1; ibin<=maxbin; ibin++) theHist->AddBinContent( ibin );
else if (sign < 0)
for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->AddBinContent( ibin );
else
fLogger << kFATAL << "<GetEfficiency> Mismatch in sign" << Endl;
}
theTree->ResetBranchAddresses();
fEffS->Scale( 1.0/(fEffS->GetMaximum() > 0 ? fEffS->GetMaximum() : 1) );
fEffB->Scale( 1.0/(fEffB->GetMaximum() > 0 ? fEffB->GetMaximum() : 1) );
fEffBvsS = new TH1F( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
fEffBvsS->SetXTitle( "signal eff" );
fEffBvsS->SetYTitle( "backgr eff" );
fRejBvsS = new TH1F( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
fRejBvsS->SetXTitle( "signal eff" );
fRejBvsS->SetYTitle( "backgr rejection (1-eff)" );
finvBeffvsSeff = new TH1F( GetTestvarName() + "_invBeffvsSeff",
GetTestvarName() + "", fNbins, 0, 1 );
finvBeffvsSeff->SetXTitle( "signal eff" );
finvBeffvsSeff->SetYTitle( "inverse backgr. eff (1/eff)" );
if (Use_Splines_for_Eff_) {
fGraphS = new TGraph( fEffS );
fGraphB = new TGraph( fEffB );
fSplRefS = new TSpline1( "spline2_signal", fGraphS );
fSplRefB = new TSpline1( "spline2_background", fGraphB );
Tools::CheckSplines( fEffS, fSplRefS );
Tools::CheckSplines( fEffB, fSplRefB );
}
ResetThisBase();
RootFinder rootFinder( &IGetEffForRoot, fXmin, fXmax );
Double_t effB = 0;
for (Int_t bini=1; bini<=fNbins; bini++) {
Double_t effS = fEffBvsS->GetBinCenter( bini );
Double_t cut = rootFinder.Root( effS );
if (Use_Splines_for_Eff_)
effB = fSplRefB->Eval( cut );
else
effB = fEffB->GetBinContent( fEffB->FindBin( cut ) );
fEffBvsS->SetBinContent( bini, effB );
fRejBvsS->SetBinContent( bini, 1.0-effB );
if (effB>std::numeric_limits<double>::epsilon())
finvBeffvsSeff->SetBinContent( bini, 1.0/effB );
}
fGrapheffBvsS = new TGraph( fEffBvsS );
fSpleffBvsS = new TSpline1( "effBvsS", fGrapheffBvsS );
Double_t effS, rejB, effS_ = 0, rejB_ = 0;
Int_t nbins_ = 5000;
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
rejB = 1.0 - fSpleffBvsS->Eval( effS );
if ((effS - rejB)*(effS_ - rejB_) < 0) break;
effS_ = effS;
rejB_ = rejB;
}
Double_t cut = rootFinder.Root( 0.5*(effS + effS_) );
SetSignalReferenceCut( cut );
}
if (NULL == fSpleffBvsS) return 0.0;
Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
Int_t nbins_ = 1000;
if (computeArea) {
Double_t integral = 0;
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSpleffBvsS->Eval( effS );
integral += (1.0 - effB);
}
integral /= nbins_;
return integral;
}
else {
Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSpleffBvsS->Eval( effS );
if ((effB - effBref)*(effB_ - effBref) < 0) break;
effS_ = effS;
effB_ = effB;
}
effS = 0.5*(effS + effS_);
effSerr = 0;
if (nevtS > 0) effSerr = TMath::Sqrt( effS*(1.0 - effS)/nevtS );
return effS;
}
return -1;
}
Double_t TMVA::MethodBase::GetTrainingEfficiency( TString theString)
{
TList* list = Tools::ParseFormatLine( theString );
if (list->GetSize() != 2) {
fLogger << kFATAL << "<GetTrainingEfficiency> Wrong number of arguments"
<< " in string: " << theString
<< " | required format, e.g., Efficiency:0.05" << Endl;
return -1;
}
Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
if (fHistS_highbin->GetNbinsX() != fHistB_highbin->GetNbinsX() ||
fHistS_plotbin->GetNbinsX() != fHistB_plotbin->GetNbinsX()) {
fLogger << kFATAL << "<GetTrainingEfficiency> Binning mismatch between signal and background histos"
<< Endl;
fIsOK = kFALSE;
return -1.0;
}
Double_t xmin = fHistS_highbin->GetXaxis()->GetXmin();
Double_t xmax = fHistS_highbin->GetXaxis()->GetXmax();
Bool_t firstPass = kFALSE;
if (NULL == fTrainEffS && NULL == fTrainEffB) firstPass = kTRUE;
if (firstPass) {
fTrainEffS = new TH1F( GetTestvarName() + "_trainingEffS", GetTestvarName() + " (signal)",
fNbinsH, xmin, xmax );
fTrainEffB = new TH1F( GetTestvarName() + "_trainingEffB", GetTestvarName() + " (background)",
fNbinsH, xmin, xmax );
Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
for (Int_t ievt=0; ievt<Data().GetNEvtTrain(); ievt++) {
ReadTrainingEvent(ievt);
TH1* theHist = (IsSignalEvent() ? fTrainEffS : fTrainEffB);
Double_t theVal = this->GetMvaValue();
TAxis* axis = theHist->GetXaxis();
Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
if (sign > 0 && maxbin > fNbinsH) continue;
if (sign < 0 && maxbin < 1 ) continue;
if (sign > 0 && maxbin < 1 ) maxbin = 1;
if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
if (sign > 0)
for (Int_t ibin=1; ibin<=maxbin; ibin++) theHist->AddBinContent( ibin );
else if (sign < 0)
for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->AddBinContent( ibin );
else
fLogger << kFATAL << "<GetEfficiency> Mismatch in sign" << Endl;
}
fTrainEffS->Scale( 1.0/(fTrainEffS->GetMaximum() > 0 ? fTrainEffS->GetMaximum() : 1) );
fTrainEffB->Scale( 1.0/(fTrainEffB->GetMaximum() > 0 ? fTrainEffB->GetMaximum() : 1) );
fTrainEffBvsS = new TH1F( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
fTrainRejBvsS = new TH1F( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
if (Use_Splines_for_Eff_) {
fGraphTrainS = new TGraph( fTrainEffS );
fGraphTrainB = new TGraph( fTrainEffB );
fSplTrainRefS = new TSpline1( "spline2_signal", fGraphTrainS );
fSplTrainRefB = new TSpline1( "spline2_background", fGraphTrainB );
Tools::CheckSplines( fTrainEffS, fSplTrainRefS );
Tools::CheckSplines( fTrainEffB, fSplTrainRefB );
}
ResetThisBase();
RootFinder rootFinder(&IGetEffForRoot, fXmin, fXmax );
Double_t effB = 0;
for (Int_t bini=1; bini<=fNbins; bini++) {
Double_t effS = fTrainEffBvsS->GetBinCenter( bini );
Double_t cut = rootFinder.Root( effS );
if (Use_Splines_for_Eff_)
effB = fSplTrainRefB->Eval( cut );
else
effB = fTrainEffB->GetBinContent( fTrainEffB->FindBin( cut ) );
fTrainEffBvsS->SetBinContent( bini, effB );
fTrainRejBvsS->SetBinContent( bini, 1.0-effB );
}
fGraphTrainEffBvsS = new TGraph( fTrainEffBvsS );
fSplTrainEffBvsS = new TSpline1( "effBvsS", fGraphTrainEffBvsS );
}
if (NULL == fSplTrainEffBvsS) return 0.0;
Double_t effS, effB, effS_ = 0, effB_ = 0;
Int_t nbins_ = 1000;
for (Int_t bini=1; bini<=nbins_; bini++) {
effS = (bini - 0.5)/Float_t(nbins_);
effB = fSplTrainEffBvsS->Eval( effS );
if ((effB - effBref)*(effB_ - effBref) < 0) break;
effS_ = effS;
effB_ = effB;
}
return 0.5*(effS + effS_);
}
Double_t TMVA::MethodBase::GetSignificance( void ) const
{
Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
return (rms > 0) ? TMath::Abs(fMeanS - fMeanB)/rms : 0;
}
Double_t TMVA::MethodBase::GetSeparation( TH1* histoS, TH1* histoB ) const
{
Double_t xmin = histoS->GetXaxis()->GetXmin();
Double_t xmax = histoB->GetXaxis()->GetXmax();
if (xmin != histoB->GetXaxis()->GetXmin() || xmax != histoB->GetXaxis()->GetXmax()) {
fLogger << kFATAL << "<GetSeparation> Mismatch in histogram limits: "
<< xmin << " " << histoB->GetXaxis()->GetXmin()
<< xmax << " " << histoB->GetXaxis()->GetXmax() << Endl;
}
Double_t separation = 0;
Int_t nstep = histoS->GetNbinsX();
Double_t intBin = (xmax - xmin)/nstep;
for (Int_t bin=0; bin<nstep; bin++) {
Double_t s = histoS->GetBinContent(bin);
Double_t b = histoB->GetBinContent(bin);
if (s + b > 0) separation += 0.5*(s - b)*(s - b)/(s + b);
}
separation *= intBin;
return separation;
}
Double_t TMVA::MethodBase::GetSeparation( PDF* pdfS, PDF* pdfB ) const
{
if (!pdfS && pdfB || pdfS && !pdfB)
fLogger << kFATAL << "<GetSeparation> Mismatch in pdfs" << Endl;
if (!pdfS) pdfS = fSplS;
if (!pdfB) pdfB = fSplB;
Double_t xmin = pdfS->GetXmin();
Double_t xmax = pdfS->GetXmax();
if (xmin != pdfB->GetXmin() || xmax != pdfB->GetXmax()) {
fLogger << kFATAL << "<GetSeparation> Mismatch in PDF limits: "
<< xmin << " " << pdfB->GetXmin() << xmax << " " << pdfB->GetXmax() << Endl;
}
Double_t separation = 0;
Int_t nstep = 100;
Double_t intBin = (xmax - xmin)/nstep;
for (Int_t bin=0; bin<nstep; bin++) {
Double_t x = (bin + 0.5)*intBin + xmin;
Double_t s = pdfS->GetVal( x );
Double_t b = pdfB->GetVal( x );
if (s + b > 0) separation += 0.5*(s - b)*(s - b)/(s + b);
}
separation *= intBin;
return separation;
}
Double_t TMVA::MethodBase::GetOptimalSignificance(Double_t SignalEvents,
Double_t BackgroundEvents,
Double_t& optimal_significance_value ) const
{
Double_t optimal_significance(0);
Double_t effS(0),effB(0),significance(0);
TH1F *temp_histogram = new TH1F("temp", "temp", fNbinsH, fXmin, fXmax );
if (SignalEvents <= 0 || BackgroundEvents <= 0) {
fLogger << kFATAL << "<GetOptimalSignificance> "
<< "Number of signal or background events is <= 0 ==> abort"
<< Endl;
}
fLogger << kINFO << "Using ratio SignalEvents/BackgroundEvents = "
<< SignalEvents/BackgroundEvents << Endl;
if ((fEffS == 0) || (fEffB == 0)) {
fLogger << kWARNING << "Efficiency histograms empty !" << Endl;
fLogger << kWARNING << "no optimal cut found, return 0" << Endl;
return 0;
}
for (Int_t bin=1; bin<=fNbinsH; bin++) {
effS = fEffS->GetBinContent( bin );
effB = fEffB->GetBinContent( bin );
significance = sqrt(SignalEvents)*( effS )/sqrt( effS + ( BackgroundEvents / SignalEvents) * effB );
temp_histogram->SetBinContent(bin,significance);
}
optimal_significance = temp_histogram->GetBinCenter( temp_histogram->GetMaximumBin() );
optimal_significance_value = temp_histogram->GetBinContent( temp_histogram->GetMaximumBin() );
temp_histogram->Delete();
fLogger << kINFO << "Optimal cut at : " << optimal_significance << Endl;
fLogger << kINFO << "Optimal significance: " << optimal_significance_value << Endl;
return optimal_significance;
}
Double_t TMVA::MethodBase::GetmuTransform( TTree *theTree )
{
Int_t nbin = 70;
fHistBhatS = new TH1F( GetTestvarName() + "_BhatS", GetTestvarName() + ": Bhat (S)", nbin, 0.0, 1.0 );
fHistBhatB = new TH1F( GetTestvarName() + "_BhatB", GetTestvarName() + ": Bhat (B)", nbin, 0.0, 1.0 );
fHistBhatS->Sumw2();
fHistBhatB->Sumw2();
vector<Double_t>* aBhatB = new vector<Double_t>;
vector<Double_t>* aBhatS = new vector<Double_t>;
Float_t x;
TBranch* br = theTree->GetBranch(GetTestvarName());
for (Int_t ievt=0; ievt<theTree->GetEntries(); ievt++) {
ReadEvent(theTree,ievt);
br->SetAddress(&x);
br->GetEvent(ievt);
Double_t s = fSplS->GetVal( x );
Double_t b = fSplB->GetVal( x );
Double_t aBhat = 0;
if (b + s > 0) aBhat = b/(b + s);
if (IsSignalEvent()) {
aBhatS->push_back ( aBhat );
fHistBhatS->Fill( aBhat );
}
else {
aBhatB->push_back ( aBhat );
fHistBhatB->Fill( aBhat );
}
}
fHistBhatS->Scale( 1.0/((fHistBhatS->GetEntries() > 0 ? fHistBhatS->GetEntries() : 1) / nbin) );
fHistBhatB->Scale( 1.0/((fHistBhatB->GetEntries() > 0 ? fHistBhatB->GetEntries() : 1) / nbin) );
PDF* yB = new PDF( fHistBhatB, PDF::kSpline2, 100 );
Int_t nevtS = aBhatS->size();
Int_t nevtB = aBhatB->size();
Int_t nbinMu = 50;
fHistMuS = new TH1F( GetTestvarName() + "_muTransform_S",
GetTestvarName() + ": mu-Transform (S)", nbinMu, 0.0, 1.0 );
fHistMuB = new TH1F( GetTestvarName() + "_muTransform_B",
GetTestvarName() + ": mu-Transform (B)", nbinMu, 0.0, 1.0 );
for (Int_t ievt=0; ievt<nevtS; ievt++) {
Double_t w = yB->GetVal( (*aBhatS)[ievt] );
if (w > 0) fHistMuS->Fill( 1.0 - (*aBhatS)[ievt], 1.0/w );
}
for (Int_t ievt=0; ievt<nevtB; ievt++) {
Double_t w = yB->GetVal( (*aBhatB)[ievt] );
if (w > 0) fHistMuB->Fill( 1.0 - (*aBhatB)[ievt], 1.0/w );
}
Tools::NormHist( fHistMuS );
Tools::NormHist( fHistMuB );
PDF* thePdf = new PDF( fHistMuS, PDF::kSpline2 );
Double_t intS = 0;
Int_t nstp = 10000;
for (Int_t istp=0; istp<nstp; istp++) {
Double_t x = (istp + 0.5)/Double_t(nstp);
intS += x*thePdf->GetVal( x );
}
intS /= Double_t(nstp);
delete yB;
delete thePdf;
delete aBhatB;
delete aBhatS;
return intS;
}
void TMVA::MethodBase::Statistics( Types::ETreeType treeType, const TString& theVarName,
Double_t& meanS, Double_t& meanB,
Double_t& rmsS, Double_t& rmsB,
Double_t& xmin, Double_t& xmax,
Bool_t norm )
{
Long64_t entries = ( (treeType == Types::kTesting ) ? Data().GetNEvtTest() :
(treeType == Types::kTraining) ? Data().GetNEvtTrain() : -1 );
if (entries <=0)
fLogger << kFATAL << "<CalculateEstimator> Wrong tree type: " << treeType << Endl;
UInt_t varIndex = Data().FindVar( theVarName );
Double_t* varVecS = new Double_t[entries];
Double_t* varVecB = new Double_t[entries];
xmin = +1e20;
xmax = -1e20;
Long64_t nEventsS = -1;
Long64_t nEventsB = -1;
meanS = 0;
meanB = 0;
rmsS = 0;
rmsB = 0;
Double_t sumwS = 0, sumwB = 0;
for (Int_t i = 0; i < entries; i++) {
if (treeType == Types::kTesting ) ReadTestEvent(i);
else ReadTrainingEvent(i);
Double_t theVar = (norm) ? GetEventValNormalised(varIndex) : GetEventVal(varIndex);
Double_t weight = GetEventWeight();
if (IsSignalEvent()) {
sumwS += weight;
meanS += weight*theVar;
rmsS += weight*theVar*theVar;
}
else {
sumwB += weight;
meanB += weight*theVar;
rmsB += weight*theVar*theVar;
}
if (IsSignalEvent()) varVecS[++nEventsS] = theVar;
else varVecB[++nEventsB] = theVar;
xmin = TMath::Min( xmin, theVar );
xmax = TMath::Max( xmax, theVar );
}
++nEventsS;
++nEventsB;
meanS = meanS/sumwS;
meanB = meanB/sumwB;
rmsS = TMath::Sqrt( rmsS/sumwS - meanS*meanS );
rmsB = TMath::Sqrt( rmsB/sumwB - meanB*meanB );
delete [] varVecS;
delete [] varVecB;
}
void TMVA::MethodBase::WriteEvaluationHistosToFile( TDirectory* targetDir )
{
if (targetDir!=0) targetDir->cd();
else BaseDir()->cd();
if (0 != fHistS_plotbin ) fHistS_plotbin->Write();
if (0 != fHistB_plotbin ) fHistB_plotbin->Write();
if (0 != fProbaS_plotbin ) fProbaS_plotbin->Write();
if (0 != fProbaB_plotbin ) fProbaB_plotbin->Write();
if (0 != fRarityS_plotbin) fRarityS_plotbin->Write();
if (0 != fRarityB_plotbin) fRarityB_plotbin->Write();
if (0 != fHistS_highbin ) fHistS_highbin->Write();
if (0 != fHistB_highbin ) fHistB_highbin->Write();
if (0 != fEffS ) fEffS->Write();
if (0 != fEffB ) fEffB->Write();
if (0 != fEffBvsS ) fEffBvsS->Write();
if (0 != fRejBvsS ) fRejBvsS->Write();
if (0 != finvBeffvsSeff ) finvBeffvsSeff->Write();
if (0 != fHistBhatS ) fHistBhatS->Write();
if (0 != fHistBhatB ) fHistBhatB->Write();
if (0 != fHistMuS ) fHistMuS->Write();
if (0 != fHistMuB ) fHistMuB->Write();
if (0 != fMVAPdfS) {
fMVAPdfS->GetOriginalHist()->Write();
fMVAPdfS->GetSmoothedHist()->Write();
fMVAPdfS->GetPDFHist()->Write();
}
if (0 != fMVAPdfB) {
fMVAPdfB->GetOriginalHist()->Write();
fMVAPdfB->GetSmoothedHist()->Write();
fMVAPdfB->GetPDFHist()->Write();
}
}
void TMVA::MethodBase::MakeClass( const TString& theClassFileName ) const
{
TString classFileName = "";
if (theClassFileName == "")
classFileName = fFileDir + "/" + fJobName + "_" + GetMethodTitle() + ".class.C";
else
classFileName = theClassFileName;
TString className = TString("Read") + GetMethodTitle();
TString tfname( classFileName );
fLogger << kINFO << "Creating standalone response class : "
<< Tools::Color("lightblue") << classFileName << Tools::Color("reset") << Endl;
ofstream fout( classFileName );
if (!fout.good()) {
fLogger << kFATAL << "<MakeClass> Unable to open file: " << classFileName << Endl;
}
fout << "// Class: " << className << endl;
fout << "// Automatically generated by MethodBase::MakeClass" << endl << "//" << endl;
fout << endl;
fout << "/* configuration options =====================================================" << endl << endl;
WriteStateToStream( fout, kTRUE );
fout << endl;
fout << "============================================================================ */" << endl;
fout << "" << endl;
fout << "#include <vector>" << endl;
fout << "#include <cmath>" << endl;
fout << "#include <string>" << endl;
fout << "#include <iostream>" << endl;
fout << "" << endl;
this->MakeClassSpecificHeader( fout, className );
fout << "#ifndef IClassifierReader__def" << endl;
fout << "#define IClassifierReader__def" << endl;
fout << endl;
fout << "class IClassifierReader {" << endl;
fout << endl;
fout << " public:" << endl;
fout << endl;
fout << " // constructor" << endl;
fout << " IClassifierReader() {}" << endl;
fout << " virtual ~IClassifierReader() {}" << endl;
fout << endl;
fout << " // return classifier response" << endl;
fout << " virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << endl;
fout << "};" << endl;
fout << endl;
fout << "#endif" << endl;
fout << endl;
fout << "class " << className << " : public IClassifierReader {" << endl;
fout << endl;
fout << " public:" << endl;
fout << endl;
fout << " // constructor" << endl;
fout << " " << className << "( std::vector<std::string>& theInputVars ) " << endl;
fout << " : IClassifierReader()," << endl;
fout << " fClassName( \"" << className << "\" )," << endl;
fout << " fStatusIsClean( true )," << endl;
fout << " fNvars( " << GetNvar() << " )," << endl;
fout << " fIsNormalised( " << (IsNormalised() ? "true" : "false") << " )" << endl;
fout << " { " << endl;
fout << " // the training input variables" << endl;
fout << " const char* inputVars[] = { ";
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
fout << "\"" << GetOriginalVarName(ivar) << "\"";
if (ivar<GetNvar()-1) fout << ", ";
}
fout << " };" << endl;
fout << endl;
fout << " // sanity checks" << endl;
fout << " if (theInputVars.size() <= 0) {" << endl;
fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << endl;
fout << " fStatusIsClean = false;" << endl;
fout << " }" << endl;
fout << endl;
fout << " if (theInputVars.size() != fNvars) {" << endl;
fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << endl;
fout << " << theInputVars.size() << \" != \" << fNvars << std::endl;" << endl;
fout << " fStatusIsClean = false;" << endl;
fout << " }" << endl;
fout << endl;
fout << " // validate input variables" << endl;
fout << " for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << endl;
fout << " if (theInputVars[ivar] != inputVars[ivar]) {" << endl;
fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << endl;
fout << " << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << endl;
fout << " fStatusIsClean = false;" << endl;
fout << " }" << endl;
fout << " }" << endl;
fout << endl;
fout << " // initialize min and max vectors (for normalisation)" << endl;
for (Int_t ivar = 0; ivar < GetNvar(); ivar++) {
fout << " fVmin[" << ivar << "] = " << setprecision(15) << GetXmin( ivar ) << ";" << endl;
fout << " fVmax[" << ivar << "] = " << setprecision(15) << GetXmax( ivar ) << ";" << endl;
}
fout << endl;
fout << " // initialize input variable types" << endl;
for (Int_t ivar=0; ivar<GetNvar(); ivar++) {
fout << " fType[" << ivar << "] = \'" << Data().GetVarType(ivar) << "\';" << endl;
}
fout << endl;
fout << " // initialize constants" << endl;
fout << " Initialize();" << endl;
fout << endl;
if (fVariableTransform != Types::kNone) {
fout << " // initialize transformation" << endl;
fout << " InitTransform();" << endl;
}
fout << " }" << endl;
fout << endl;
fout << " // destructor" << endl;
fout << " virtual ~" << className << "() {" << endl;
fout << " Clear(); // method-specific" << endl;
fout << " }" << endl;
fout << endl;
fout << " // the classifier response" << endl;
fout << " // \"inputValues\" is a vector of input values in the same order as the " << endl;
fout << " // variables given to the constructor" << endl;
fout << " double GetMvaValue( const std::vector<double>& inputValues ) const" << endl;
fout << " {" << endl;
fout << " // classifier response value" << endl;
fout << " double retval = 0;" << endl;
fout << endl;
fout << " // classifier response, sanity check first" << endl;
fout << " if (!fStatusIsClean) {" << endl;
fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << endl;
fout << " << \" because status is dirty\" << std::endl;" << endl;
fout << " retval = 0;" << endl;
fout << " }" << endl;
fout << " else {" << endl;
fout << " if (IsNormalised()) {" << endl;
fout << " // normalise variables" << endl;
fout << " std::vector<double> iV;" << endl;
fout << " int ivar = 0;" << endl;
fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << endl;
fout << " varIt != inputValues.end(); varIt++, ivar++) {" << endl;
fout << " iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << endl;
fout << " }" << endl;
if (fVariableTransform != Types::kNone && GetMethodType() != Types::kLikelihood )
fout << " Transform( iV, 0 );" << endl;
fout << " retval = GetMvaValue__( iV );" << endl;
fout << " }" << endl;
fout << " else {" << endl;
if (fVariableTransform != Types::kNone && GetMethodType() != Types::kLikelihood )
fout << " Transform( inputValues, 0 );" << endl;
fout << " retval = GetMvaValue__( inputValues );" << endl;
fout << " }" << endl;
fout << " }" << endl;
fout << endl;
fout << " return retval;" << endl;
fout << " }" << endl;
fout << endl;
fout << " private:" << endl;
fout << endl;
fout << " // method-specific destructor" << endl;
fout << " void Clear();" << endl;
fout << endl;
if (fVariableTransform != Types::kNone) {
fout << " // input variable transformation" << endl;
GetVarTransform().MakeFunction(fout, className,1);
fout << " void InitTransform();" << endl;
fout << " void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << endl;
fout << endl;
}
fout << " // common member variables" << endl;
fout << " const char* fClassName;" << endl;
fout << " bool fStatusIsClean;" << endl;
fout << endl;
fout << " const size_t fNvars;" << endl;
fout << " size_t GetNvar() const { return fNvars; }" << endl;
fout << " char GetType( int ivar ) const { return fType[ivar]; }" << endl;
fout << endl;
fout << " // normalisation of input variables" << endl;
fout << " const bool fIsNormalised;" << endl;
fout << " bool IsNormalised() const { return fIsNormalised; }" << endl;
fout << " double fVmin[" << GetNvar() << "];" << endl;
fout << " double fVmax[" << GetNvar() << "];" << endl;
fout << " double NormVariable( double x, double xmin, double xmax ) const {" << endl;
fout << " // normalise to output range: [-1, 1]" << endl;
fout << " return 2*(x - xmin)/(xmax - xmin) - 1.0;" << endl;
fout << " }" << endl;
fout << endl;
fout << " // type of input variable: 'F' or 'I'" << endl;
fout << " char fType[" << GetNvar() << "];" << endl;
fout << endl;
fout << " // initialize internal variables" << endl;
fout << " void Initialize();" << endl;
fout << " double GetMvaValue__( const std::vector<double>& inputValues ) const;" << endl;
fout << "" << endl;
fout << " // private members (method specific)" << endl;
MakeClassSpecific( fout, className );
if (fVariableTransform != Types::kNone) GetVarTransform().MakeFunction(fout, className,2);
fout.close();
}
TMVA::MethodBase* TMVA::MethodBase::fgThisBase = NULL;
Double_t TMVA::MethodBase::IGetEffForRoot( Double_t theCut )
{
return MethodBase::GetThisBase()->GetEffForRoot( theCut );
}
Double_t TMVA::MethodBase::GetEffForRoot( Double_t theCut )
{
Double_t retval=0;
if (Use_Splines_for_Eff_) {
retval = fSplRefS->Eval( theCut );
}
else
retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
Double_t eps = 1.0e-5;
if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
return retval;
}
void TMVA::MethodBase::WriteMonitoringHistosToFile( void ) const
{
fLogger << kINFO << "No monitoring histograms written" << Endl;
}
TString TMVA::MethodBase::GetTrainingTMVAVersionString() const
{
UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000; a>>=16;
UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00; b>>=8;
UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
return TString(Form("%i.%i.%i",a,b,c));
}
TString TMVA::MethodBase::GetTrainingROOTVersionString() const
{
UInt_t a = GetTrainingROOTVersionCode() & 0xff0000; a>>=16;
UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00; b>>=8;
UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
return TString(Form("%i.%02i/%02i",a,b,c));
}
This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.