126 TMVA::MethodDT::MethodDT( const
TString& jobName,
128 DataSetInfo& theData,
131 TMVA::MethodBase( jobName, Types::kDT, methodTitle, theData, theOption, theTargetDir )
138 , fNodePurityLimit(0)
142 , fPruneMethod(DecisionTree::kNoPruning)
144 , fRandomisedTrees(kFALSE)
146 , fUsePoissonNvars(0)
147 , fDeltaPruneStrength(0)
164 , fNodePurityLimit(0)
170 , fRandomisedTrees(
kFALSE)
172 , fDeltaPruneStrength(0)
210 DeclareOptionRef(fRandomisedTrees,
"UseRandomisedTrees",
"Choose at each node splitting a random set of variables and *bagging*");
211 DeclareOptionRef(fUseNvars,
"UseNvars",
"Number of variables used if randomised Tree option is chosen");
212 DeclareOptionRef(fUsePoissonNvars,
"UsePoissonNvars",
"Interpret \"UseNvars\" not as fixed number but as mean of a Possion distribution in each split with RandomisedTree option");
213 DeclareOptionRef(fUseYesNoLeaf=
kTRUE,
"UseYesNoLeaf",
214 "Use Sig or Bkg node type or the ratio S/B as classification in the leaf node");
215 DeclareOptionRef(fNodePurityLimit=0.5,
"NodePurityLimit",
"In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
216 DeclareOptionRef(fSepTypeS=
"GiniIndex",
"SeparationType",
"Separation criterion for node splitting");
217 AddPreDefVal(
TString(
"MisClassificationError"));
218 AddPreDefVal(
TString(
"GiniIndex"));
219 AddPreDefVal(
TString(
"CrossEntropy"));
220 AddPreDefVal(
TString(
"SDivSqrtSPlusB"));
221 DeclareOptionRef(fMinNodeEvents=-1,
"nEventsMin",
"deprecated !!! Minimum number of events required in a leaf node");
222 DeclareOptionRef(fMinNodeSizeS,
"MinNodeSize",
"Minimum percentage of training events required in a leaf node (default: Classification: 10%, Regression: 1%)");
223 DeclareOptionRef(fNCuts,
"nCuts",
"Number of steps during node cut optimisation");
224 DeclareOptionRef(fPruneStrength,
"PruneStrength",
"Pruning strength (negative value == automatic adjustment)");
225 DeclareOptionRef(fPruneMethodS=
"NoPruning",
"PruneMethod",
"Pruning method: NoPruning (switched off), ExpectedError or CostComplexity");
227 AddPreDefVal(
TString(
"NoPruning"));
228 AddPreDefVal(
TString(
"ExpectedError"));
229 AddPreDefVal(
TString(
"CostComplexity"));
231 if (DoRegression()) {
232 DeclareOptionRef(fMaxDepth=50,
"MaxDepth",
"Max depth of the decision tree allowed");
234 DeclareOptionRef(fMaxDepth=3,
"MaxDepth",
"Max depth of the decision tree allowed");
243 DeclareOptionRef(fPruneBeforeBoost=
kFALSE,
"PruneBeforeBoost",
244 "--> removed option .. only kept for reader backward compatibility");
254 else if (fSepTypeS ==
"giniindex") fSepType =
new GiniIndex();
255 else if (fSepTypeS ==
"crossentropy") fSepType =
new CrossEntropy();
256 else if (fSepTypeS ==
"sdivsqrtsplusb") fSepType =
new SdivSqrtSplusB();
259 Log() <<
kFATAL <<
"<ProcessOptions> unknown Separation Index option called" <<
Endl;
264 fPruneMethodS.ToLower();
270 Log() <<
kFATAL <<
"<ProcessOptions> unknown PruneMethod option:" << fPruneMethodS <<
" called" <<
Endl;
273 if (fPruneStrength < 0) fAutomatic =
kTRUE;
277 <<
"Sorry autmoatic pruning strength determination is not implemented yet for ExpectedErrorPruning" <<
Endl;
281 if (this->
Data()->HasNegativeEventWeights()){
282 Log() <<
kINFO <<
" You are using a Monte Carlo that has also negative weights. "
283 <<
"That should in principle be fine as long as on average you end up with "
284 <<
"something positive. For this you have to make sure that the minimal number "
285 <<
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize="
287 <<
", (or the deprecated equivalent nEventsMin) you can set this via the "
288 <<
"MethodDT option string when booking the "
289 <<
"classifier) is large enough to allow for reasonable averaging!!! "
290 <<
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining "
291 <<
"which ignores events with negative weight in the training. " <<
Endl
292 <<
Endl <<
"Note: You'll get a WARNING message during the training if that should ever happen" <<
Endl;
295 if (fRandomisedTrees){
296 Log() <<
kINFO <<
" Randomised trees should use *bagging* as *boost* method. Did you set this in the *MethodBoost* ? . Here I can enforce only the *no pruning*" <<
Endl;
301 if (fMinNodeEvents > 0){
302 fMinNodeSize = fMinNodeEvents /
Data()->GetNTrainingEvents() * 100;
303 Log() <<
kWARNING <<
"You have explicitly set *nEventsMin*, the min ablsolut number \n"
304 <<
"of events in a leaf node. This is DEPRECATED, please use the option \n"
305 <<
"*MinNodeSize* giving the relative number as percentage of training \n"
306 <<
"events instead. \n"
307 <<
"nEventsMin="<<fMinNodeEvents<<
"--> MinNodeSize="<<fMinNodeSize<<
"%"
310 SetMinNodeSize(fMinNodeSizeS);
315 if (sizeInPercent > 0 && sizeInPercent < 50){
316 fMinNodeSize=sizeInPercent;
319 Log() <<
kERROR <<
"you have demanded a minimal node size of "
320 << sizeInPercent <<
"% of the training events.. \n"
321 <<
" that somehow does not make sense "<<
Endl;
327 if (sizeInPercent.
IsAlnum()) SetMinNodeSize(sizeInPercent.
Atof());
329 Log() <<
kERROR <<
"I had problems reading the option MinNodeEvents, which\n"
330 <<
"after removing a possible % sign now reads " << sizeInPercent <<
Endl;
343 fMinNodeSizeS =
"5%";
347 fDeltaPruneStrength=0.1;
349 fUseNvars = GetNvar();
350 fUsePoissonNvars =
kTRUE;
353 SetSignalReferenceCut( 0 );
374 fTree =
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), 0,
375 fRandomisedTrees, fUseNvars, fUsePoissonNvars,fMaxDepth,0 );
376 fTree->SetNVars(GetNvar());
377 if (fRandomisedTrees)
Log()<<
kWARNING<<
" randomised Trees do not work yet in this framework,"
378 <<
" as I do not know how to give each tree a new random seed, now they"
379 <<
" will be all the same and that is not good " <<
Endl;
380 fTree->SetAnalysisType( GetAnalysisType() );
384 UInt_t nevents =
Data()->GetNTrainingEvents();
385 std::vector<const TMVA::Event*> tmp;
386 for (
Long64_t ievt=0; ievt<nevents; ievt++) {
387 const Event *
event = GetEvent(ievt);
388 tmp.push_back(event);
390 fTree->BuildTree(tmp);
411 for(
UInt_t i = 0; i < nodes.size(); i++)
412 fTree->PruneNode(nodes[i]);
496 return fPruneStrength;
506 for (
Long64_t ievt=0; ievt<
Data()->GetNEvents(); ievt++)
508 const Event * ev =
Data()->GetEvent(ievt);
513 return SumCorrect / (SumCorrect + SumWrong);
520 fTree->AddXMLTo(parent);
531 fTree->ReadXML(wghtnode,GetTrainingTMVAVersionCode());
549 NoErrorCalc(err, errUpper);
551 return fTree->CheckEvent(GetEvent(),fUseYesNoLeaf);
void Optimize()
determine the pruning sequence
MsgLogger & Endl(MsgLogger &ml)
void GetHelpMessage() const
void Init(void)
common initialisation with defaults for the DT-Method
TString & ReplaceAll(const TString &s1, const TString &s2)
Double_t Atof() const
Return floating-point value contained in string.
Double_t GetNodePurityLimit() const
Bool_t IsAlnum() const
Returns true if all characters in string are alphanumeric.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
std::vector< TMVA::DecisionTreeNode * > GetOptimalPruneSequence() const
return the prune strength (=alpha) corresponding to the prune sequence
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Double_t PruneTree()
prune the decision tree if requested (good for individual trees that are best grown out...
void SetMinNodeSize(Double_t sizeInPercent)
void ReadWeightsFromStream(std::istream &istr)
void DeclareOptions()
define the options (their key words) that can be set in the option string UseRandomisedTrees choose a...
MethodDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=0)
std::vector< std::vector< double > > Data
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
void SetPruneStrength(Float_t alpha=-1.0)
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
ClassImp(TMVA::MethodDT) TMVA
the standard constructor for just an ordinar "decision trees"
Double_t TestTreeQuality(DecisionTree *dt)
void AddWeightsXMLTo(void *parent) const
Describe directory structure in memory.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
#define REGISTER_METHOD(CLASS)
for example
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual ~MethodDT(void)
destructor
void ReadWeightsFromXML(void *wghtnode)
Float_t GetOptimalPruneStrength() const
const Ranking * CreateRanking()