31#ifndef ROOT_TMVA_MethodBDT
32#define ROOT_TMVA_MethodBDT
147 inline const std::vector<TMVA::DecisionTree*> &
GetForest()
const;
167 const TString& className )
const;
226 std::map< const TMVA::Event*,std::vector<double> >
fResiduals;
#define ClassDef(name, id)
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Class that contains all the data information.
Implementation of a Decision Tree.
Analysis of Boosted Decision Trees.
Double_t fCbb
Cost factor.
std::vector< Double_t > fHighBkgCut
void SetBaggedSampleFraction(Double_t f)
Bool_t fBaggedGradBoost
turn bagging in combination with grad boost on/off
DecisionTree::EPruneMethod fPruneMethod
method used for pruning
std::vector< const TMVA::Event * > fEventSample
the training events
void Init(void)
Common initialisation with defaults for the BDT-Method.
Double_t fHuberQuantile
the option string determining the quantile for the Huber Loss Function in BDT regression.
static const Int_t fgDebugLevel
debug level determining some printout/control plots etc.
Bool_t fBaggedBoost
turn bagging in combination with boost on/off
TString fMinNodeSizeS
string containing min percentage of training events in node
void BoostMonitor(Int_t iTree)
Fills the ROCIntegral vs Itree from the testSample for the monitoring plots during the training .
const std::vector< Float_t > & GetMulticlassValues()
Get the multiclass MVA response for the BDT classifier.
std::map< const TMVA::Event *, LossFunctionEventInfo > fLossFunctionEventInfo
map event to true value, predicted value, and weight used by different loss functions for BDT regress...
std::vector< const TMVA::Event * > * fTrainSample
pointer to sample actually used in training (fEventSample or fSubSample) for example
std::vector< Bool_t > fIsHighSigCut
Double_t AdaBoostR2(std::vector< const TMVA::Event * > &, DecisionTree *dt)
Adaption of the AdaBoost to regression problems (see H.Drucker 1997).
Double_t PrivateGetMvaValue(const TMVA::Event *ev, Double_t *err=nullptr, Double_t *errUpper=nullptr, UInt_t useNTrees=0)
Return the MVA value (range [-1;1]) that classifies the event according to the majority vote from the...
void MakeClassSpecific(std::ostream &, const TString &) const
Make ROOT-independent C++ class for classifier response (classifier-specific implementation).
Bool_t fPairNegWeightsGlobal
pair ev. with neg. and pos. weights in training sample and "annihilate" them
Bool_t fSkipNormalization
true for skipping normalization at initialization of trees
Bool_t fUseExclusiveVars
individual variables already used in fisher criterium are not anymore analysed individually for node ...
UInt_t fUseNvars
the number of variables used in the randomised tree splitting
Double_t fCts_sb
Cost factor.
void GetHelpMessage() const
Get help message text.
LossFunctionBDT * fRegressionLossFunctionBDTG
void DeterminePreselectionCuts(const std::vector< const TMVA::Event * > &eventSample)
Find useful preselection cuts that will be applied before and Decision Tree training.
Int_t fNCuts
grid used in cut applied in node splitting
Double_t GradBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
Calculate the desired response value for each region.
const Ranking * CreateRanking()
Compute ranking of input variables.
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
Set the tuning parameters according to the argument.
void SetAdaBoostBeta(Double_t b)
Bool_t fUsePoissonNvars
use "fUseNvars" not as fixed number but as mean of a poisson distr. in each split
Float_t fMinNodeSize
min percentage of training events in node
Bool_t fDoBoostMonitor
create control plot with ROC integral vs tree number
Double_t AdaCost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
The AdaCost boosting algorithm takes a simple cost Matrix (currently fixed for all events....
void DeclareOptions()
Define the options (their key words).
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr)
Bool_t fTrainWithNegWeights
yes there are negative event weights and we don't ignore them
TString fRegressionLossFunctionBDTGS
the option string determining the loss function for BDT regression
std::vector< double > fBoostWeights
the weights applied in the individual boosts
Bool_t fDoPreselection
do or do not perform automatic pre-selection of 100% eff. cuts
std::vector< Double_t > fVariableImportance
the relative importance of the different variables
Int_t fMinNodeEvents
min number of events in node
std::vector< Bool_t > fIsLowBkgCut
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
Call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Double_t Boost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
Apply the boosting algorithm (the algorithm is selecte via the "option" given in the constructor.
Double_t TestTreeQuality(DecisionTree *dt)
Test the tree quality.. in terms of Misclassification.
std::vector< DecisionTree * > fForest
the collection of decision trees
std::vector< Bool_t > fIsLowSigCut
Double_t Bagging()
Call it boot-strapping, re-sampling or whatever you like, in the end it is nothing else but applying ...
Double_t fErrorFraction
ntuple var: misclassification error fraction
Bool_t fRandomisedTrees
choose a random subset of possible cut variables at each node during training
Double_t fBaggedSampleFraction
relative size of bagged event sample to original sample size
Double_t fCss
Cost factor.
Bool_t fUseFisherCuts
use multivariate splits using the Fisher criterium
Double_t fPruneStrength
a parameter to set the "amount" of pruning..needs to be adjusted
const std::vector< double > & GetBoostWeights() const
Int_t fNTrees
number of decision trees requested
void SetMaxDepth(Int_t d)
void UpdateTargets(std::vector< const TMVA::Event * > &, UInt_t cls=0)
Calculate residual for all events.
Double_t fFValidationEvents
fraction of events to use for pruning
std::vector< const TMVA::Event * > fSubSample
subsample for bagged grad boost
void UpdateTargetsRegression(std::vector< const TMVA::Event * > &, Bool_t first=kFALSE)
Calculate residuals for all events and update targets for next iter.
Double_t GradBoostRegression(std::vector< const TMVA::Event * > &, DecisionTree *dt)
Implementation of M_TreeBoost using any loss function as described by Friedman 1999.
void WriteMonitoringHistosToFile(void) const
Here we could write some histograms created during the processing to the output file.
std::vector< Double_t > fLowBkgCut
UInt_t fMaxDepth
max depth
void SetShrinkage(Double_t s)
TString fAdaBoostR2Loss
loss type used in AdaBoostR2 (Linear,Quadratic or Exponential)
virtual ~MethodBDT(void)
Destructor.
void AddWeightsXMLTo(void *parent) const
Write weights to XML.
Double_t GetGradBoostMVA(const TMVA::Event *e, UInt_t nTrees)
Returns MVA value: -1 for background, 1 for signal.
TString fPruneMethodS
prune method option String
Double_t fNodePurityLimit
purity limit for sig/bkg nodes
Int_t fITree
ntuple var: ith tree
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
BDT can handle classification with multiple classes and regression with one regression-target.
Double_t fShrinkage
learning rate for gradient boost;
void SetNodePurityLimit(Double_t l)
TString fSepTypeS
the separation (option string) used in node splitting
Double_t RegBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
A special boosting only for Regression (not implemented).
void InitEventSample()
Initialize the event sample (i.e. reset the boost-weights... etc).
Double_t ApplyPreselectionCuts(const Event *ev)
Apply the preselection cuts before even bothering about any Decision Trees in the GetMVA .
void SetMinNodeSize(Double_t sizeInPercent)
Double_t fBoostWeight
ntuple var: boost weight
void ProcessOptions()
The option string is decoded, for available options see "DeclareOptions".
void PreProcessNegativeEventWeights()
O.k.
Bool_t fUseYesNoLeaf
use sig or bkg classification in leave nodes or sig/bkg
std::vector< const TMVA::Event * > fValidationSample
the Validation events
Bool_t fAutomatic
use user given prune strength or automatically determined one using a validation sample
std::vector< Double_t > fLowSigCut
Bool_t fInverseBoostNegWeights
boost ev. with neg. weights with 1/boostweight rather than boostweight
Double_t fCtb_ss
Cost factor.
std::map< const TMVA::Event *, std::vector< double > > fResiduals
individual event residuals for gradient boost
UInt_t fNNodesMax
max # of nodes
void MakeClassInstantiateNode(DecisionTreeNode *n, std::ostream &fout, const TString &className) const
Recursively descends a tree and writes the node instance to the output stream.
Double_t AdaBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
The AdaBoost implementation.
TTree * fMonitorNtuple
monitoring ntuple
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
void SetUseNvars(Int_t n)
Bool_t fNoNegWeightsInTraining
ignore negative event weights in the training
Double_t fAdaBoostBeta
beta parameter for AdaBoost algorithm
void InitGradBoost(std::vector< const TMVA::Event * > &)
Initialize targets for first tree.
void Train(void)
BDT training.
const std::vector< TMVA::DecisionTree * > & GetForest() const
void GetBaggedSubSample(std::vector< const TMVA::Event * > &)
Fills fEventSample with fBaggedSampleFraction*NEvents random training events.
const std::vector< const TMVA::Event * > & GetTrainingEvents() const
const std::vector< Float_t > & GetRegressionValues()
Get the regression value generated by the BDTs.
std::vector< Double_t > fHighSigCut
SeparationBase * fSepType
the separation used in node splitting
void ReadWeightsFromXML(void *parent)
Reads the BDT from the xml file.
void ReadWeightsFromStream(std::istream &istr)
Read the weights (BDT coefficients).
TString fNegWeightTreatment
variable that holds the option of how to treat negative event weights in training
std::vector< Bool_t > fIsHighBkgCut
void Reset(void)
Reset the method, as if it had just been instantiated (forget all training etc.).
Double_t fSigToBkgFraction
Signal to Background fraction assumed during training.
void MakeClassSpecificHeader(std::ostream &, const TString &) const
Specific class header.
Double_t fMinLinCorrForFisher
the minimum linear correlation between two variables demanded for use in fisher criterium in node spl...
UInt_t fUseNTrainEvents
number of randomly picked training events used in randomised (and bagged) trees
TString fBoostType
string specifying the boost type
void DeclareCompatibilityOptions()
Options that are used ONLY for the READER to ensure backward compatibility.
Virtual base Class for all MVA method.
virtual void ReadWeightsFromStream(std::istream &)=0
Ranking for variables in method (implementation)
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
A TTree represents a columnar dataset.
create variable transformations