140   , fNodePurityLimit(0)
 
  146   , fRandomisedTrees(
kFALSE)
 
  148   , fUsePoissonNvars(0)  
 
  149   , fDeltaPruneStrength(0)
 
  158                          const TString& theWeightFile) :
 
  166   , fNodePurityLimit(0)
 
  172   , fRandomisedTrees(
kFALSE)
 
  174   , fDeltaPruneStrength(0)
 
  216   DeclareOptionRef(fRandomisedTrees,
"UseRandomisedTrees",
"Choose at each node splitting a random set of variables and *bagging*");
 
  217   DeclareOptionRef(fUseNvars,
"UseNvars",
"Number of variables used if randomised Tree option is chosen");
 
  218   DeclareOptionRef(fUsePoissonNvars,
"UsePoissonNvars", 
"Interpret \"UseNvars\" not as fixed number but as mean of a Poisson distribution in each split with RandomisedTree option");
 
  219   DeclareOptionRef(fUseYesNoLeaf=
kTRUE, 
"UseYesNoLeaf",
 
  220                    "Use Sig or Bkg node type or the ratio S/B as classification in the leaf node");
 
  221   DeclareOptionRef(fNodePurityLimit=0.5, 
"NodePurityLimit", 
"In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
 
  222   DeclareOptionRef(fSepTypeS=
"GiniIndex", 
"SeparationType", 
"Separation criterion for node splitting");
 
  223   AddPreDefVal(
TString(
"MisClassificationError"));
 
  224   AddPreDefVal(
TString(
"GiniIndex"));
 
  225   AddPreDefVal(
TString(
"CrossEntropy"));
 
  226   AddPreDefVal(
TString(
"SDivSqrtSPlusB"));
 
  227   DeclareOptionRef(fMinNodeEvents=-1, 
"nEventsMin", 
"deprecated !!! Minimum number of events required in a leaf node");
 
  228   DeclareOptionRef(fMinNodeSizeS, 
"MinNodeSize", 
"Minimum percentage of training events required in a leaf node (default: Classification: 10%, Regression: 1%)");
 
  229   DeclareOptionRef(fNCuts, 
"nCuts", 
"Number of steps during node cut optimisation");
 
  230   DeclareOptionRef(fPruneStrength, 
"PruneStrength", 
"Pruning strength (negative value == automatic adjustment)");
 
  231   DeclareOptionRef(fPruneMethodS=
"NoPruning", 
"PruneMethod", 
"Pruning method: NoPruning (switched off), ExpectedError or CostComplexity");
 
  233   AddPreDefVal(
TString(
"NoPruning"));
 
  234   AddPreDefVal(
TString(
"ExpectedError"));
 
  235   AddPreDefVal(
TString(
"CostComplexity"));
 
  237   if (DoRegression()) {
 
  238      DeclareOptionRef(fMaxDepth=50,
"MaxDepth",
"Max depth of the decision tree allowed");
 
  240      DeclareOptionRef(fMaxDepth=3,
"MaxDepth",
"Max depth of the decision tree allowed");
 
  251   DeclareOptionRef(fPruneBeforeBoost=
kFALSE, 
"PruneBeforeBoost",
 
  252                    "--> removed option .. only kept for reader backward compatibility");
 
  262   else if (fSepTypeS == 
"giniindex")              fSepType = 
new GiniIndex();
 
  263   else if (fSepTypeS == 
"crossentropy")           fSepType = 
new CrossEntropy();
 
  264   else if (fSepTypeS == 
"sdivsqrtsplusb")         fSepType = 
new SdivSqrtSplusB();
 
  266      Log() << kINFO << GetOptions() << 
Endl;
 
  267      Log() << kFATAL << 
"<ProcessOptions> unknown Separation Index option called" << 
Endl;
 
  272   fPruneMethodS.ToLower();
 
  277      Log() << kINFO << GetOptions() << 
Endl;
 
  278      Log() << kFATAL << 
"<ProcessOptions> unknown PruneMethod option:" << fPruneMethodS <<
" called" << 
Endl;
 
  281   if (fPruneStrength < 0) fAutomatic = 
kTRUE;
 
  285            <<  
"Sorry automatic pruning strength determination is not implemented yet for ExpectedErrorPruning" << 
Endl;
 
  289   if (this->Data()->HasNegativeEventWeights()){
 
  290      Log() << kINFO << 
" You are using a Monte Carlo that has also negative weights. " 
  291            << 
"That should in principle be fine as long as on average you end up with " 
  292            << 
"something positive. For this you have to make sure that the minimal number " 
  293            << 
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize=" 
  295            <<
", (or the deprecated equivalent nEventsMin) you can set this via the " 
  296            <<
"MethodDT option string when booking the " 
  297            << 
"classifier) is large enough to allow for reasonable averaging!!! " 
  298            << 
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining  " 
  299            << 
"which ignores events with negative weight in the training. " << 
Endl 
  300            << 
Endl << 
"Note: You'll get a WARNING message during the training if that should ever happen" << 
Endl;
 
  303   if (fRandomisedTrees){
 
  304      Log() << kINFO << 
" Randomised trees should use *bagging* as *boost* method. Did you set this in the *MethodBoost* ? . Here I can enforce only the *no pruning*" << 
Endl;
 
  309   if (fMinNodeEvents > 0){
 
  310      fMinNodeSize = fMinNodeEvents / Data()->GetNTrainingEvents() * 100;
 
  311      Log() << kWARNING << 
"You have explicitly set *nEventsMin*, the min absolute number \n" 
  312            << 
"of events in a leaf node. This is DEPRECATED, please use the option \n" 
  313            << 
"*MinNodeSize* giving the relative number as percentage of training \n" 
  314            << 
"events instead. \n" 
  315            << 
"nEventsMin="<<fMinNodeEvents<< 
"--> MinNodeSize="<<fMinNodeSize<<
"%" 
  318      SetMinNodeSize(fMinNodeSizeS);
 
  323   if (sizeInPercent > 0 && sizeInPercent < 50){
 
  324      fMinNodeSize=sizeInPercent;
 
  327      Log() << kERROR << 
"you have demanded a minimal node size of " 
  328            << sizeInPercent << 
"% of the training events.. \n" 
  329            << 
" that somehow does not make sense "<<
Endl;
 
  335   if (sizeInPercent.
IsAlnum()) SetMinNodeSize(sizeInPercent.
Atof());
 
  337      Log() << kERROR << 
"I had problems reading the option MinNodeEvents, which\n" 
  338            << 
"after removing a possible % sign now reads " << sizeInPercent << 
Endl;
 
  349   fMinNodeSizeS   = 
"5%";
 
  353   fDeltaPruneStrength=0.1;
 
  355   fUseNvars       = GetNvar();
 
  356   fUsePoissonNvars = 
kTRUE;
 
  359   SetSignalReferenceCut( 0 );
 
  380   fTree = 
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), 0,
 
  381                             fRandomisedTrees, fUseNvars, fUsePoissonNvars,fMaxDepth,0 );
 
  382   fTree->SetNVars(GetNvar());
 
  383   if (fRandomisedTrees) 
Log()<<kWARNING<<
" randomised Trees do not work yet in this framework," 
  384                              << 
" as I do not know how to give each tree a new random seed, now they" 
  385                              << 
" will be all the same and that is not good " << 
Endl;
 
  386   fTree->SetAnalysisType( GetAnalysisType() );
 
  390   UInt_t nevents = Data()->GetNTrainingEvents();
 
  391   std::vector<const TMVA::Event*> tmp;
 
  392   for (
Long64_t ievt=0; ievt<nevents; ievt++) {
 
  393      const Event *
event = GetEvent(ievt);
 
  394      tmp.push_back(event);
 
  396   fTree->BuildTree(tmp);
 
  418      for(
UInt_t i = 0; i < nodes.size(); i++)
 
  419         fTree->PruneNode(nodes[i]);
 
  503   return fPruneStrength;
 
  513   for (
Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++)
 
  515         const Event * ev = Data()->GetEvent(ievt);
 
  520   return  SumCorrect / (SumCorrect + SumWrong);
 
  527   fTree->AddXMLTo(parent);
 
  538   fTree->ReadXML(wghtnode,GetTrainingTMVAVersionCode());
 
  556   NoErrorCalc(err, errUpper);
 
  558   return fTree->CheckEvent(GetEvent(),fUseYesNoLeaf);
 
#define REGISTER_METHOD(CLASS)
for example
A helper class to prune a decision tree using the Cost Complexity method (see Classification and Regr...
void SetPruneStrength(Float_t alpha=-1.0)
void Optimize()
determine the pruning sequence
std::vector< TMVA::DecisionTreeNode * > GetOptimalPruneSequence() const
return the prune strength (=alpha) corresponding to the prune sequence
Float_t GetOptimalPruneStrength() const
Implementation of the CrossEntropy as separation criterion.
Class that contains all the data information.
Implementation of a Decision Tree.
Double_t GetNodePurityLimit() const
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Implementation of the GiniIndex as separation criterion.
Virtual base Class for all MVA method.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Analysis of Boosted Decision Trees.
virtual ~MethodDT(void)
destructor
MethodDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
the standard constructor for just an ordinar "decision trees"
Double_t TestTreeQuality(DecisionTree *dt)
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value
const Ranking * CreateRanking()
void ReadWeightsFromXML(void *wghtnode)
void GetHelpMessage() const
void AddWeightsXMLTo(void *parent) const
Double_t PruneTree()
prune the decision tree if requested (good for individual trees that are best grown out,...
void ReadWeightsFromStream(std::istream &istr)
void DeclareOptions()
Define the options (their key words) that can be set in the option string.
void Init(void)
common initialisation with defaults for the DT-Method
void SetMinNodeSize(Double_t sizeInPercent)
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
Implementation of the MisClassificationError as separation criterion.
Ranking for variables in method (implementation)
Implementation of the SdivSqrtSplusB as separation criterion.
Singleton class for Global types used by TMVA.
Double_t Atof() const
Return floating-point value contained in string.
TString & ReplaceAll(const TString &s1, const TString &s2)
Bool_t IsAlnum() const
Returns true if all characters in string are alphanumeric.
Abstract ClassifierFactory template that handles arbitrary types.
MsgLogger & Endl(MsgLogger &ml)