Logo ROOT   6.12/07
Reference Guide
MethodDT.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodDT (Boosted Decision Trees) *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Analysis of Boosted Decision Trees *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
16  * Or Cohen <orcohenor@gmail.com> - Weizmann Inst., Israel *
17  * *
18  * Copyright (c) 2005: *
19  * CERN, Switzerland *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #ifndef ROOT_TMVA_MethodDT
28 #define ROOT_TMVA_MethodDT
29 
30 //////////////////////////////////////////////////////////////////////////
31 // //
32 // MethodDT //
33 // //
34 // Analysis of Single Decision Tree //
35 // //
36 //////////////////////////////////////////////////////////////////////////
37 
38 #include <vector>
39 #include "TH1.h"
40 #include "TH2.h"
41 #include "TTree.h"
42 #include "TMVA/MethodBase.h"
43 #include "TMVA/DecisionTree.h"
44 #include "TMVA/Event.h"
45 
46 namespace TMVA {
47  class MethodBoost;
48 
49  class MethodDT : public MethodBase {
50  public:
51  MethodDT( const TString& jobName,
52  const TString& methodTitle,
53  DataSetInfo& theData,
54  const TString& theOption = "");
55 
56  MethodDT( DataSetInfo& dsi,
57  const TString& theWeightFile);
58 
59  virtual ~MethodDT( void );
60 
61  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
62 
63  void Train( void );
64 
66 
67  // write weights to file
68  void AddWeightsXMLTo( void* parent ) const;
69 
70  // read weights from file
71  void ReadWeightsFromStream( std::istream& istr );
72  void ReadWeightsFromXML ( void* wghtnode );
73 
74  // calculate the MVA value
75  Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
76 
77  // the option handling methods
78  void DeclareOptions();
79  void ProcessOptions();
81 
82  void GetHelpMessage() const;
83 
84  // ranking of input variables
85  const Ranking* CreateRanking();
86 
88 
90 
92 
93  void SetMinNodeSize(Double_t sizeInPercent);
94  void SetMinNodeSize(TString sizeInPercent);
95 
98 
99  private:
100  // Init used in the various constructors
101  void Init( void );
102 
103  private:
104 
105 
106  std::vector<Event*> fEventSample; // the training events
107 
108  DecisionTree* fTree; // the decision tree
109  //options for the decision Tree
110  SeparationBase *fSepType; // the separation used in node splitting
111  TString fSepTypeS; // the separation (option string) used in node splitting
112  Int_t fMinNodeEvents; // min number of events in node
113  Float_t fMinNodeSize; // min percentage of training events in node
114  TString fMinNodeSizeS; // string containing min percentage of training events in node
115 
116  Int_t fNCuts; // grid used in cut applied in node splitting
117  Bool_t fUseYesNoLeaf; // use sig or bkg classification in leave nodes or sig/bkg
118  Double_t fNodePurityLimit; // purity limit for sig/bkg nodes
119  UInt_t fMaxDepth; // max depth
120 
121 
122  Double_t fErrorFraction; // ntuple var: misclassification error fraction
123  Double_t fPruneStrength; // a parameter to set the "amount" of pruning..needs to be adjusted
124  DecisionTree::EPruneMethod fPruneMethod; // method used for pruning
125  TString fPruneMethodS; // prune method option String
126  Bool_t fAutomatic; // use user given prune strength or automatically determined one using a validation sample
127  Bool_t fRandomisedTrees; // choose a random subset of possible cut variables at each node during training
128  Int_t fUseNvars; // the number of variables used in the randomised tree splitting
129  Bool_t fUsePoissonNvars; // fUseNvars is used as a poisson mean, and the actual value of useNvars is at each step drawn form that distribution
130  std::vector<Double_t> fVariableImportance; // the relative importance of the different variables
131 
132  Double_t fDeltaPruneStrength; // step size in pruning, is adjusted according to experience of previous trees
133  // debugging flags
134  static const Int_t fgDebugLevel = 0; // debug level determining some printout/control plots etc.
135 
136 
137  Bool_t fPruneBeforeBoost; //ancient variable, only needed for "CompatibilityOptions"
138 
139  ClassDef(MethodDT,0); // Analysis of Decision Trees
140 
141  };
142 }
143 
144 #endif
Bool_t fUsePoissonNvars
Definition: MethodDT.h:129
float Float_t
Definition: RtypesCore.h:53
std::vector< Event * > fEventSample
Definition: MethodDT.h:106
void Init(void)
common initialisation with defaults for the DT-Method
Definition: MethodDT.cxx:345
UInt_t GetNNodes() const
Definition: BinaryTree.h:86
TString fPruneMethodS
Definition: MethodDT.h:125
DecisionTree::EPruneMethod fPruneMethod
Definition: MethodDT.h:124
EAnalysisType
Definition: Types.h:125
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
Basic string class.
Definition: TString.h:125
Ranking for variables in method (implementation)
Definition: Ranking.h:48
static const Int_t fgDebugLevel
Definition: MethodDT.h:134
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodDT.cxx:182
Double_t fNodePurityLimit
Definition: MethodDT.h:118
Double_t PruneTree()
prune the decision tree if requested (good for individual trees that are best grown out...
Definition: MethodDT.cxx:408
Bool_t fPruneBeforeBoost
Definition: MethodDT.h:137
void SetMinNodeSize(Double_t sizeInPercent)
Definition: MethodDT.cxx:322
Double_t fPruneStrength
Definition: MethodDT.h:123
Int_t fUseNvars
Definition: MethodDT.h:128
#define ClassDef(name, id)
Definition: Rtypes.h:320
Bool_t fAutomatic
Definition: MethodDT.h:126
UInt_t fMaxDepth
Definition: MethodDT.h:119
void ReadWeightsFromStream(std::istream &istr)
Definition: MethodDT.cxx:543
Int_t fMinNodeEvents
Definition: MethodDT.h:112
Double_t GetPruneStrength()
Definition: MethodDT.h:91
void DeclareOptions()
Define the options (their key words) that can be set in the option string.
Definition: MethodDT.cxx:214
Int_t GetNNodes()
Definition: MethodDT.h:97
Double_t fDeltaPruneStrength
Definition: MethodDT.h:132
Class that contains all the data information.
Definition: DataSetInfo.h:60
void AddWeightsXMLTo(void *parent) const
Definition: MethodDT.cxx:525
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
Definition: MethodDT.cxx:258
Double_t fErrorFraction
Definition: MethodDT.h:122
TString fSepTypeS
Definition: MethodDT.h:111
Int_t GetNNodesBeforePruning()
Definition: DecisionTree.h:174
Implementation of a Decision Tree.
Definition: DecisionTree.h:59
unsigned int UInt_t
Definition: RtypesCore.h:42
DecisionTree * fTree
Definition: MethodDT.h:108
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
Float_t fMinNodeSize
Definition: MethodDT.h:113
Double_t TestTreeQuality(DecisionTree *dt)
Definition: MethodDT.cxx:508
Int_t fNCuts
Definition: MethodDT.h:116
double Double_t
Definition: RtypesCore.h:55
Bool_t fRandomisedTrees
Definition: MethodDT.h:127
int type
Definition: TGX11.cxx:120
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value
Definition: MethodDT.cxx:553
TString fMinNodeSizeS
Definition: MethodDT.h:114
void Train(void)
Definition: MethodDT.cxx:377
Int_t GetNNodesBeforePruning()
Definition: MethodDT.h:96
MethodDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
the standard constructor for just an ordinar "decision trees"
Definition: MethodDT.cxx:129
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
Definition: MethodDT.cxx:247
Abstract ClassifierFactory template that handles arbitrary types.
virtual ~MethodDT(void)
destructor
Definition: MethodDT.cxx:370
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDT.cxx:533
Bool_t fUseYesNoLeaf
Definition: MethodDT.h:117
virtual void ReadWeightsFromStream(std::istream &)=0
void GetHelpMessage() const
Definition: MethodDT.cxx:563
std::vector< Double_t > fVariableImportance
Definition: MethodDT.h:130
Analysis of Boosted Decision Trees.
Definition: MethodDT.h:49
const Ranking * CreateRanking()
Definition: MethodDT.cxx:568
SeparationBase * fSepType
Definition: MethodDT.h:110