library: libTMVA #include "MethodCuts.h" |
void | CreateVariablePDFs() |
void | GetEffsfromPDFs(Double_t* cutMin, Double_t* cutMax, Double_t& effS, Double_t& effB) |
void | GetEffsfromSelection(Double_t* cutMin, Double_t* cutMax, Double_t& effS, Double_t& effB) |
void | InitCuts() |
void | MatchCutsToPars(Double_t*, Double_t*, Double_t*) |
void | MatchParsToCuts(const vector<Double_t>&, Double_t*, Double_t*) |
void | MatchParsToCuts(Double_t*, Double_t*, Double_t*) |
Bool_t | SanityChecks() |
enum EConstrainType { | kConstrainEffS | |
kConstrainEffB | ||
}; | ||
enum EFitMethodType { | kUseMonteCarlo | |
kUseGeneticAlgorithm | ||
kUseSimulatedAnnealing | ||
}; | ||
enum EEffMethod { | kUseEventSelection | |
kUsePDFs | ||
}; | ||
enum EFitParameters { | kNotEnforced | |
kForceMin | ||
kForceMax | ||
kForceSmart | ||
kForceVerySmart | ||
}; | ||
enum TMVA::MethodBase::EWeightFileType { | kROOT | |
kTEXT | ||
}; | ||
enum TMVA::MethodBase::ECutOrientation { | kNegative | |
kPositive | ||
}; | ||
enum TObject::EStatusBits { | kCanDelete | |
kMustCleanup | ||
kObjInCanvas | ||
kIsReferenced | ||
kHasUUID | ||
kCannotPick | ||
kNoContextMenu | ||
kInvalidObject | ||
}; | ||
enum TObject::[unnamed] { | kIsOnHeap | |
kNotDeleted | ||
kZombie | ||
kBitMask | ||
kSingleKey | ||
kOverwrite | ||
kWriteDelete | ||
}; |
TMVA::Ranking* | TMVA::MethodBase::fRanking | ranking |
vector<TString>* | TMVA::MethodBase::fInputVars | vector of input variables used in MVA |
Bool_t | TMVA::MethodBase::fIsOK | status of sanity checks |
TH1* | TMVA::MethodBase::fHistS_plotbin | MVA plots used for graphics representation (signal) |
TH1* | TMVA::MethodBase::fHistB_plotbin | MVA plots used for graphics representation (background) |
TH1* | TMVA::MethodBase::fHistS_highbin | MVA plots used for efficiency calculations (signal) |
TH1* | TMVA::MethodBase::fHistB_highbin | MVA plots used for efficiency calculations (background) |
TH1* | TMVA::MethodBase::fEffS | efficiency plot (signal) |
TH1* | TMVA::MethodBase::fEffB | efficiency plot (background) |
TH1* | TMVA::MethodBase::fEffBvsS | background efficiency versus signal efficiency |
TH1* | TMVA::MethodBase::fRejBvsS | background rejection (=1-eff.) versus signal efficiency |
TH1* | TMVA::MethodBase::fHistBhatS | working histograms needed for mu-transform (signal) |
TH1* | TMVA::MethodBase::fHistBhatB | working histograms needed for mu-transform (background) |
TH1* | TMVA::MethodBase::fHistMuS | mu-transform (signal) |
TH1* | TMVA::MethodBase::fHistMuB | mu-transform (background) |
TH1* | TMVA::MethodBase::fTrainEffS | Training efficiency plot (signal) |
TH1* | TMVA::MethodBase::fTrainEffB | Training efficiency plot (background) |
TH1* | TMVA::MethodBase::fTrainEffBvsS | Training background efficiency versus signal efficiency |
TH1* | TMVA::MethodBase::fTrainRejBvsS | Training background rejection (=1-eff.) versus signal efficiency |
Double_t | TMVA::MethodBase::fX | |
Double_t | TMVA::MethodBase::fMode | |
TGraph* | TMVA::MethodBase::fGraphS | graphs used for splines for efficiency (signal) |
TGraph* | TMVA::MethodBase::fGraphB | graphs used for splines for efficiency (background) |
TGraph* | TMVA::MethodBase::fGrapheffBvsS | graphs used for splines for signal eff. versus background eff. |
TMVA::PDF* | TMVA::MethodBase::fSplS | PDFs of MVA distribution (signal) |
TMVA::PDF* | TMVA::MethodBase::fSplB | PDFs of MVA distribution (background) |
TSpline* | TMVA::MethodBase::fSpleffBvsS | splines for signal eff. versus background eff. |
TGraph* | TMVA::MethodBase::fGraphTrainS | graphs used for splines for training efficiency (signal) |
TGraph* | TMVA::MethodBase::fGraphTrainB | graphs used for splines for training efficiency (background) |
TGraph* | TMVA::MethodBase::fGraphTrainEffBvsS | graphs used for splines for training signal eff. versus background eff. |
TMVA::PDF* | TMVA::MethodBase::fSplTrainS | PDFs of training MVA distribution (signal) |
TMVA::PDF* | TMVA::MethodBase::fSplTrainB | PDFs of training MVA distribution (background) |
TSpline* | TMVA::MethodBase::fSplTrainEffBvsS | splines for training signal eff. versus background eff. |
Int_t | TMVA::MethodBase::fNbins | number of bins in representative histograms |
Int_t | TMVA::MethodBase::fNbinsH | number of bins in evaluation histograms |
TMVA::MethodBase::ECutOrientation | TMVA::MethodBase::fCutOrientation | +1 if Sig>Bkg, -1 otherwise |
TMVA::TSpline1* | TMVA::MethodBase::fSplRefS | helper splines for RootFinder (signal) |
TMVA::TSpline1* | TMVA::MethodBase::fSplRefB | helper splines for RootFinder (background) |
TMVA::TSpline1* | TMVA::MethodBase::fSplTrainRefS | helper splines for RootFinder (signal) |
TMVA::TSpline1* | TMVA::MethodBase::fSplTrainRefB | helper splines for RootFinder (background) |
TMVA::OptionBase* | TMVA::MethodBase::fLastDeclaredOption | last declared option |
TList | TMVA::MethodBase::fListOfOptions | option list |
TMVA::MsgLogger | TMVA::MethodBase::fLogger | message logger |
TMVA::MethodCuts::EConstrainType | fConstrainType | |
TString | fFitMethodS | chosen fit method (string) |
TMVA::MethodCuts::EFitMethodType | fFitMethod | chosen fit method |
TString | fEffMethodS | chosen efficiency calculation method (string) |
TMVA::MethodCuts::EEffMethod | fEffMethod | chosen efficiency calculation method |
vector<EFitParameters>* | fFitParams | vector for series of fit methods |
Double_t | fTestSignalEff | used to test optimized signal efficiency |
Double_t | fEffSMin | used to test optimized signal efficiency |
Double_t | fEffSMax | used to test optimized signal efficiency |
TMVA::BinarySearchTree* | fBinaryTreeS | |
TMVA::BinarySearchTree* | fBinaryTreeB | |
Int_t | fGA_nsteps | GA settings: number of steps |
Int_t | fGA_cycles | GA settings: number of pre-calc steps |
Int_t | fGA_popSize | GA settings: population size |
Int_t | fGA_SC_steps | GA settings: SC_steps |
Int_t | fGA_SC_offsteps | GA settings: SC_offsteps |
Double_t | fGA_SC_factor | GA settings: SC_factor |
Int_t | fSA_MaxCalls | max number of FCN calls |
Double_t | fSA_TemperatureGradient | starting value for temperature gradient |
Bool_t | fSA_UseAdaptiveTemperature | compute temperature steps on the fly |
Double_t | fSA_InitialTemperature | initial temperature (depends on FCN) |
Double_t | fSA_MinTemperature | minimum temperature before SA quit |
Double_t | fSA_Eps | relative required FCN accuracy at minimum |
Int_t | fSA_NFunLoops | number of FCN loops |
Int_t | fSA_NEps | test parameter |
Int_t | fNRandCuts | number of random cut samplings |
Double_t** | fCutMin | minimum requirement |
Double_t** | fCutMax | maximum requirement |
Double_t* | fTmpCutMin | temporary minimum requirement |
Double_t* | fTmpCutMax | temporary maximum requirement |
TString | fAllVars | |
TString | fAllVarsI[10] | |
Int_t | fNpar | number of parameters in fit (default: 2*Nvar) |
Double_t | fEffRef | reference efficiency |
vector<Int_t>* | fRangeSign | used to match cuts to fit parameters (and vice versa) |
TRandom* | fRandom | random generator for MC optimisation method |
vector<Double_t>* | fMeanS | means of variables (signal) |
vector<Double_t>* | fMeanB | means of variables (background) |
vector<Double_t>* | fRmsS | RMSs of variables (signal) |
vector<Double_t>* | fRmsB | RMSs of variables (background) |
vector<Double_t>* | fXmin | minimum values of variables |
vector<Double_t>* | fXmax | maximum values of variables |
TH1* | fEffBvsSLocal | intermediate eff. background versus eff signal histo |
vector<TH1*>* | fVarHistS | reference histograms (signal) |
vector<TH1*>* | fVarHistB | reference histograms (background) |
vector<TH1*>* | fVarHistS_smooth | smoothed reference histograms (signal) |
vector<TH1*>* | fVarHistB_smooth | smoothed reference histograms (background) |
vector<PDF*>* | fVarPdfS | reference PDFs (signal) |
vector<PDF*>* | fVarPdfB | reference PDFs (background) |
static TMVA::MethodCuts* | fgThisCuts | used for function reference (GA) |
/* Multivariate optimisation of signal efficiency for given background efficiency, applying rectangular minimum and maximum requirements.
Also implemented is a "decorrelate/diagonlized cuts approach", which improves over the uncorrelated cuts ansatz by transforming linearly the input variables into a diagonal space, using the square-root of the covariance matrix.
Other optimisation criteria, such as maximising the signal significance- squared, S^2/(S+B), with S and B being the signal and background yields, correspond to a particular point in the optimised background rejection versus signal efficiency curve. This working point requires the knowledge of the expected yields, which is not the case in general. Note also that for rare signals, Poissonian statistics should be used, which modifies the significance criterion.
The rectangular cut of a volume in the variable space is performed using a binary tree to sort the training events. This provides a significant reduction in computing time (up to several orders of magnitudes, depending on the complexity of the problem at hand).
Technically, optimisation is achieved in TMVA by two methods:
Attempts to use Minuit fits (Simplex ot Migrad) instead have not shown superior results, and often failed due to convergence at local minima.
The tests we have performed so far showed that in generic applications, the GA is superior to MC sampling, and hence GA is the default method. It is worthwhile trying both anyway. Decorrelated (or "diagonalized") Cuts
See class description for Method Likelihood for a detailed explanation. */
standard constructor ---------------------------------------------------------------------------------- format of option string: "OptMethod:EffMethod:Option_var1:...:Option_varn:Decorr" "OptMethod" can be: - "GA" : Genetic Algorithm (recommended) - "SA" : Simulated Annealing - "MC" : Monte-Carlo optimization "EffMethod" can be: - "EffSel": compute efficiency by event counting - "EffPDF": compute efficiency from PDFs === For "GA" method ====== "Option_var1++" are (see GA for explanation of parameters): - fGA_nsteps - fGA_cycles - fGA_popSize - fGA_SC_steps - fGA_SC_offsteps - fGA_SC_factor === For "SA" method ====== "Option_var1++" are (see SA for explanation of parameters): - fSA_MaxCalls - fSA_TemperatureGradient - fSA_UseAdaptiveTemperature - fSA_InitialTemperature - fSA_MinTemperature - fSA_Eps - fSA_NFunLoops - fSA_NEps === For "MC" method ====== "Option_var1" is number of random samples "Option_var2++" can be - "FMax" : ForceMax (the max cut is fixed to maximum of variable i) - "FMin" : ForceMin (the min cut is fixed to minimum of variable i) - "FSmart": ForceSmart (the min or max cut is fixed to min/max, based on mean value) - Adding "All" to "option_vari", eg, "AllFSmart" will use this option for all variables - if "option_vari" is empty (== ""), no assumptions on cut min/max are made "Decorr" can be: - omitted : Decorrelation not used - "D" : Decorrelates variables, evaluation events decorrelated with signal decorrelation matrix - "DS" : Decorrelates variables, evaluation events decorrelated with signal decorrelation matrix - "DB" : Decorrelates variables, evaluation events decorrelated with background decorrelation matrix ----------------------------------------------------------------------------------
construction from weight file
define the options (their key words) that can be set in the option string know options: Method <string> Minimization method available values are: MC Monte Carlo <default> GA Genetic Algorithm SA Simulated annealing EffMethod <string> Efficiency selection method available values are: EffSel <default> EffPDF MC_NRandCuts <int> Number of random cuts to estimate the efficiency for the MC method MC_AllVarProp <string> Property of all variables for the MC method available values are: AllNotEnforced <default> AllFMax AllFMin AllFSmart AllFVerySmart MC_Var1Prop <string> Property of variable 1 for the MC method (taking precedence over the globale setting. The same values as for the global option are available. Variables 1..10 can be set this way GA_nsteps <int> Number of steps for the genetic algorithm GA_cycles <int> Number of generations for the genetic algorithm GA_popSize <int> Size of the population for the genetic algorithm GA_SC_steps <int> Number of steps for the genetic algorithm GA_SC_offsteps <int> for the genetic algorithm GA_SC_factor <float> for the genetic algorithm SA_MaxCalls <int> maximum number of calls for simulated annealing SA_TemperatureGradient <float> temperature gradient for simulated annealing SA_UseAdaptiveTemperature <bool> use of adaptive temperature for simulated annealing SA_InitialTemperature <float> initial temperature for simulated annealing SA_MinTemperature <float> minimum temperature for simulated annealing SA_Eps <int> number of epochs for simulated annealing SA_NFunLoops <int> number of loops for simulated annealing SA_NEps <int> number of epochs for simulated annealing
returns estimator for "cut fitness" used by GA
there are two requirements:
1) the signal efficiency must be equal to the required one in the
efficiency scan
2) the background efficiency must be as small as possible
the requirement 1) has priority over 2)
translates parameters into cuts
translates cuts into parameters
compute signal and background efficiencies from PDFs for given cut sample
compute signal and background efficiencies from event counting for given cut sample
- overloaded function to create background efficiency (rejection) versus signal efficiency plot (first call of this function) - the function returns the signal efficiency at background efficiency indicated in theString "theString" must have two entries: [0]: "Efficiency" [1]: the value of background efficiency at which the signal efficiency is to be returned
- overloaded function to create background efficiency (rejection) versus signal efficiency plot (first call of this function) - the function returns the signal efficiency at background efficiency indicated in theString "theString" must have two entries: [0]: "Efficiency" [1]: the value of background efficiency at which the signal efficiency is to be returned
the definition of fit parameters can be different from the actual cut requirements; these functions provide the matching