137 , fNodePurityLimit(0)
143 , fRandomisedTrees(
kFALSE)
145 , fUsePoissonNvars(0)
146 , fDeltaPruneStrength(0)
148 fPruneBeforeBoost =
kFALSE;
163 , fNodePurityLimit(0)
169 , fRandomisedTrees(
kFALSE)
171 , fDeltaPruneStrength(0)
213 DeclareOptionRef(fRandomisedTrees,
"UseRandomisedTrees",
"Choose at each node splitting a random set of variables and *bagging*");
214 DeclareOptionRef(fUseNvars,
"UseNvars",
"Number of variables used if randomised Tree option is chosen");
215 DeclareOptionRef(fUsePoissonNvars,
"UsePoissonNvars",
"Interpret \"UseNvars\" not as fixed number but as mean of a Poisson distribution in each split with RandomisedTree option");
216 DeclareOptionRef(fUseYesNoLeaf=
kTRUE,
"UseYesNoLeaf",
217 "Use Sig or Bkg node type or the ratio S/B as classification in the leaf node");
218 DeclareOptionRef(fNodePurityLimit=0.5,
"NodePurityLimit",
"In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
219 DeclareOptionRef(fSepTypeS=
"GiniIndex",
"SeparationType",
"Separation criterion for node splitting");
220 AddPreDefVal(
TString(
"MisClassificationError"));
221 AddPreDefVal(
TString(
"GiniIndex"));
222 AddPreDefVal(
TString(
"CrossEntropy"));
223 AddPreDefVal(
TString(
"SDivSqrtSPlusB"));
224 DeclareOptionRef(fMinNodeEvents=-1,
"nEventsMin",
"deprecated !!! Minimum number of events required in a leaf node");
225 DeclareOptionRef(fMinNodeSizeS,
"MinNodeSize",
"Minimum percentage of training events required in a leaf node (default: Classification: 10%, Regression: 1%)");
226 DeclareOptionRef(fNCuts,
"nCuts",
"Number of steps during node cut optimisation");
227 DeclareOptionRef(fPruneStrength,
"PruneStrength",
"Pruning strength (negative value == automatic adjustment)");
228 DeclareOptionRef(fPruneMethodS=
"NoPruning",
"PruneMethod",
"Pruning method: NoPruning (switched off), ExpectedError or CostComplexity");
230 AddPreDefVal(
TString(
"NoPruning"));
231 AddPreDefVal(
TString(
"ExpectedError"));
232 AddPreDefVal(
TString(
"CostComplexity"));
234 if (DoRegression()) {
235 DeclareOptionRef(fMaxDepth=50,
"MaxDepth",
"Max depth of the decision tree allowed");
237 DeclareOptionRef(fMaxDepth=3,
"MaxDepth",
"Max depth of the decision tree allowed");
248 DeclareOptionRef(fPruneBeforeBoost=
kFALSE,
"PruneBeforeBoost",
249 "--> removed option .. only kept for reader backward compatibility");
259 else if (fSepTypeS ==
"giniindex") fSepType =
new GiniIndex();
260 else if (fSepTypeS ==
"crossentropy") fSepType =
new CrossEntropy();
261 else if (fSepTypeS ==
"sdivsqrtsplusb") fSepType =
new SdivSqrtSplusB();
263 Log() << kINFO << GetOptions() <<
Endl;
264 Log() << kFATAL <<
"<ProcessOptions> unknown Separation Index option called" <<
Endl;
269 fPruneMethodS.ToLower();
274 Log() << kINFO << GetOptions() <<
Endl;
275 Log() << kFATAL <<
"<ProcessOptions> unknown PruneMethod option:" << fPruneMethodS <<
" called" <<
Endl;
278 if (fPruneStrength < 0) fAutomatic =
kTRUE;
282 <<
"Sorry automatic pruning strength determination is not implemented yet for ExpectedErrorPruning" <<
Endl;
286 if (this->Data()->HasNegativeEventWeights()){
287 Log() << kINFO <<
" You are using a Monte Carlo that has also negative weights. "
288 <<
"That should in principle be fine as long as on average you end up with "
289 <<
"something positive. For this you have to make sure that the minimal number "
290 <<
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize="
292 <<
", (or the deprecated equivalent nEventsMin) you can set this via the "
293 <<
"MethodDT option string when booking the "
294 <<
"classifier) is large enough to allow for reasonable averaging!!! "
295 <<
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining "
296 <<
"which ignores events with negative weight in the training. " <<
Endl
297 <<
Endl <<
"Note: You'll get a WARNING message during the training if that should ever happen" <<
Endl;
300 if (fRandomisedTrees){
301 Log() << kINFO <<
" Randomised trees should use *bagging* as *boost* method. Did you set this in the *MethodBoost* ? . Here I can enforce only the *no pruning*" <<
Endl;
306 if (fMinNodeEvents > 0){
307 fMinNodeSize = fMinNodeEvents / Data()->GetNTrainingEvents() * 100;
308 Log() << kWARNING <<
"You have explicitly set *nEventsMin*, the min absolute number \n"
309 <<
"of events in a leaf node. This is DEPRECATED, please use the option \n"
310 <<
"*MinNodeSize* giving the relative number as percentage of training \n"
311 <<
"events instead. \n"
312 <<
"nEventsMin="<<fMinNodeEvents<<
"--> MinNodeSize="<<fMinNodeSize<<
"%"
315 SetMinNodeSize(fMinNodeSizeS);
324 Log() << kERROR <<
"you have demanded a minimal node size of "
326 <<
" that somehow does not make sense "<<
Endl;
334 Log() << kERROR <<
"I had problems reading the option MinNodeEvents, which\n"
346 fMinNodeSizeS =
"5%";
350 fDeltaPruneStrength=0.1;
352 fUseNvars = GetNvar();
353 fUsePoissonNvars =
kTRUE;
356 SetSignalReferenceCut( 0 );
377 fTree =
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), 0,
378 fRandomisedTrees, fUseNvars, fUsePoissonNvars,fMaxDepth,0 );
379 fTree->SetNVars(GetNvar());
380 if (fRandomisedTrees) Log()<<kWARNING<<
" randomised Trees do not work yet in this framework,"
381 <<
" as I do not know how to give each tree a new random seed, now they"
382 <<
" will be all the same and that is not good " <<
Endl;
383 fTree->SetAnalysisType( GetAnalysisType() );
388 std::vector<const TMVA::Event*> tmp;
391 tmp.push_back(event);
393 fTree->BuildTree(tmp);
413 std::vector<DecisionTreeNode*> nodes =
pruneTool->GetOptimalPruneSequence();
414 fPruneStrength =
pruneTool->GetOptimalPruneStrength();
415 for(
UInt_t i = 0; i < nodes.size(); i++)
416 fTree->PruneNode(nodes[i]);
496 fTree->SetPruneStrength(fPruneStrength);
500 return fPruneStrength;
513 if ((
dt->CheckEvent(
ev) >
dt->GetNodePurityLimit() ) == DataInfo().IsSignal(
ev))
SumCorrect+=
ev->GetWeight();
524 fTree->AddXMLTo(parent);
535 fTree->ReadXML(
wghtnode,GetTrainingTMVAVersionCode());
555 return fTree->CheckEvent(GetEvent(),fUseYesNoLeaf);
#define REGISTER_METHOD(CLASS)
for example
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
A helper class to prune a decision tree using the Cost Complexity method (see Classification and Regr...
Implementation of the CrossEntropy as separation criterion.
Class that contains all the data information.
static void SetIsTraining(bool on)
Implementation of a Decision Tree.
Implementation of the GiniIndex as separation criterion.
Virtual base Class for all MVA method.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Analysis of Boosted Decision Trees.
virtual ~MethodDT(void)
destructor
MethodDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
the standard constructor for just an ordinar "decision trees"
void DeclareOptions() override
Define the options (their key words) that can be set in the option string.
void ProcessOptions() override
the option string is decoded, for available options see "DeclareOptions"
void Init(void) override
common initialisation with defaults for the DT-Method
Double_t TestTreeQuality(DecisionTree *dt)
void ReadWeightsFromXML(void *wghtnode) override
const Ranking * CreateRanking() override
void GetHelpMessage() const override
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
returns MVA value
Double_t PruneTree()
prune the decision tree if requested (good for individual trees that are best grown out,...
void DeclareCompatibilityOptions() override
options that are used ONLY for the READER to ensure backward compatibility
void ReadWeightsFromStream(std::istream &istr) override
void AddWeightsXMLTo(void *parent) const override
void Train(void) override
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
FDA can handle classification with 2 classes and regression with one regression-target.
Bool_t fPruneBeforeBoost
ancient variable, only needed for "CompatibilityOptions"
void SetMinNodeSize(Double_t sizeInPercent)
Implementation of the MisClassificationError as separation criterion.
Ranking for variables in method (implementation)
Implementation of the SdivSqrtSplusB as separation criterion.
Singleton class for Global types used by TMVA.
@ kValidation
these are placeholders... currently not used, but could be moved "forward" if
create variable transformations
MsgLogger & Endl(MsgLogger &ml)