Logo ROOT   6.08/07
Reference Guide
MethodDT.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodDT (Boosted Decision Trees) *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Analysis of Boosted Decision Trees *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
16  * Or Cohen <orcohenor@gmail.com> - Weizmann Inst., Israel *
17  * *
18  * Copyright (c) 2005: *
19  * CERN, Switzerland *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #ifndef ROOT_TMVA_MethodDT
28 #define ROOT_TMVA_MethodDT
29 
30 //////////////////////////////////////////////////////////////////////////
31 // //
32 // MethodDT //
33 // //
34 // Analysis of Single Decision Tree //
35 // //
36 //////////////////////////////////////////////////////////////////////////
37 
38 #include <vector>
39 #ifndef ROOT_TH1
40 #include "TH1.h"
41 #endif
42 #ifndef ROOT_TH2
43 #include "TH2.h"
44 #endif
45 #ifndef ROOT_TTree
46 #include "TTree.h"
47 #endif
48 #ifndef ROOT_TMVA_MethodBase
49 #include "TMVA/MethodBase.h"
50 #endif
51 #ifndef ROOT_TMVA_DecisionTree
52 #include "TMVA/DecisionTree.h"
53 #endif
54 #ifndef ROOT_TMVA_Event
55 #include "TMVA/Event.h"
56 #endif
57 
58 namespace TMVA {
59  class MethodBoost;
60 
61  class MethodDT : public MethodBase {
62  public:
63  MethodDT( const TString& jobName,
64  const TString& methodTitle,
65  DataSetInfo& theData,
66  const TString& theOption = "");
67 
68  MethodDT( DataSetInfo& dsi,
69  const TString& theWeightFile);
70 
71  virtual ~MethodDT( void );
72 
73  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
74 
75  void Train( void );
76 
78 
79  // write weights to file
80  void AddWeightsXMLTo( void* parent ) const;
81 
82  // read weights from file
83  void ReadWeightsFromStream( std::istream& istr );
84  void ReadWeightsFromXML ( void* wghtnode );
85 
86  // calculate the MVA value
87  Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
88 
89  // the option handling methods
90  void DeclareOptions();
91  void ProcessOptions();
93 
94  void GetHelpMessage() const;
95 
96  // ranking of input variables
97  const Ranking* CreateRanking();
98 
100 
102 
104 
105  void SetMinNodeSize(Double_t sizeInPercent);
106  void SetMinNodeSize(TString sizeInPercent);
107 
110 
111  private:
112  // Init used in the various constructors
113  void Init( void );
114 
115  private:
116 
117 
118  std::vector<Event*> fEventSample; // the training events
119 
120  DecisionTree* fTree; // the decision tree
121  //options for the decision Tree
122  SeparationBase *fSepType; // the separation used in node splitting
123  TString fSepTypeS; // the separation (option string) used in node splitting
124  Int_t fMinNodeEvents; // min number of events in node
125  Float_t fMinNodeSize; // min percentage of training events in node
126  TString fMinNodeSizeS; // string containing min percentage of training events in node
127 
128  Int_t fNCuts; // grid used in cut applied in node splitting
129  Bool_t fUseYesNoLeaf; // use sig or bkg classification in leave nodes or sig/bkg
130  Double_t fNodePurityLimit; // purity limit for sig/bkg nodes
131  UInt_t fMaxDepth; // max depth
132 
133 
134  Double_t fErrorFraction; // ntuple var: misclassification error fraction
135  Double_t fPruneStrength; // a parameter to set the "amount" of pruning..needs to be adjusted
136  DecisionTree::EPruneMethod fPruneMethod; // method used for prunig
137  TString fPruneMethodS; // prune method option String
138  Bool_t fAutomatic; // use user given prune strength or automatically determined one using a validation sample
139  Bool_t fRandomisedTrees; // choose a random subset of possible cut variables at each node during training
140  Int_t fUseNvars; // the number of variables used in the randomised tree splitting
141  Bool_t fUsePoissonNvars; // fUseNvars is used as a poisson mean, and the actual value of useNvars is at each step drawn form that distribution
142  std::vector<Double_t> fVariableImportance; // the relative importance of the different variables
143 
144  Double_t fDeltaPruneStrength; // step size in pruning, is adjusted according to experience of previous trees
145  // debugging flags
146  static const Int_t fgDebugLevel = 0; // debug level determining some printout/control plots etc.
147 
148 
149  Bool_t fPruneBeforeBoost; //aincient variable, only needed for "CompatibilityOptions"
150 
151  ClassDef(MethodDT,0); // Analysis of Decision Trees
152 
153  };
154 }
155 
156 #endif
Bool_t fUsePoissonNvars
Definition: MethodDT.h:141
float Float_t
Definition: RtypesCore.h:53
std::vector< Event * > fEventSample
Definition: MethodDT.h:118
void Init(void)
common initialisation with defaults for the DT-Method
Definition: MethodDT.cxx:340
UInt_t GetNNodes() const
Definition: BinaryTree.h:92
TString fPruneMethodS
Definition: MethodDT.h:137
DecisionTree::EPruneMethod fPruneMethod
Definition: MethodDT.h:136
EAnalysisType
Definition: Types.h:129
Basic string class.
Definition: TString.h:137
static const Int_t fgDebugLevel
Definition: MethodDT.h:146
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodDT.cxx:180
Double_t fNodePurityLimit
Definition: MethodDT.h:130
Double_t PruneTree()
prune the decision tree if requested (good for individual trees that are best grown out...
Definition: MethodDT.cxx:403
Bool_t fPruneBeforeBoost
Definition: MethodDT.h:149
void SetMinNodeSize(Double_t sizeInPercent)
Definition: MethodDT.cxx:315
Double_t fPruneStrength
Definition: MethodDT.h:135
Int_t fUseNvars
Definition: MethodDT.h:140
#define ClassDef(name, id)
Definition: Rtypes.h:254
Bool_t fAutomatic
Definition: MethodDT.h:138
UInt_t fMaxDepth
Definition: MethodDT.h:131
void ReadWeightsFromStream(std::istream &istr)
Definition: MethodDT.cxx:538
Int_t fMinNodeEvents
Definition: MethodDT.h:124
Double_t GetPruneStrength()
Definition: MethodDT.h:103
void DeclareOptions()
define the options (their key words) that can be set in the option string UseRandomisedTrees choose a...
Definition: MethodDT.cxx:209
Int_t GetNNodes()
Definition: MethodDT.h:109
Double_t fDeltaPruneStrength
Definition: MethodDT.h:144
void AddWeightsXMLTo(void *parent) const
Definition: MethodDT.cxx:520
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
Definition: MethodDT.cxx:251
Double_t fErrorFraction
Definition: MethodDT.h:134
TString fSepTypeS
Definition: MethodDT.h:123
Int_t GetNNodesBeforePruning()
Definition: DecisionTree.h:188
unsigned int UInt_t
Definition: RtypesCore.h:42
DecisionTree * fTree
Definition: MethodDT.h:120
Float_t fMinNodeSize
Definition: MethodDT.h:125
Double_t TestTreeQuality(DecisionTree *dt)
Definition: MethodDT.cxx:503
Int_t fNCuts
Definition: MethodDT.h:128
double Double_t
Definition: RtypesCore.h:55
Bool_t fRandomisedTrees
Definition: MethodDT.h:139
int type
Definition: TGX11.cxx:120
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value
Definition: MethodDT.cxx:548
TString fMinNodeSizeS
Definition: MethodDT.h:126
void Train(void)
Definition: MethodDT.cxx:372
Int_t GetNNodesBeforePruning()
Definition: MethodDT.h:108
MethodDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
the standard constructor for just an ordinar "decision trees"
Definition: MethodDT.cxx:129
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodDT.cxx:239
Abstract ClassifierFactory template that handles arbitrary types.
virtual ~MethodDT(void)
destructor
Definition: MethodDT.cxx:365
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDT.cxx:528
Bool_t fUseYesNoLeaf
Definition: MethodDT.h:129
virtual void ReadWeightsFromStream(std::istream &)=0
void GetHelpMessage() const
Definition: MethodDT.cxx:558
std::vector< Double_t > fVariableImportance
Definition: MethodDT.h:142
const Ranking * CreateRanking()
Definition: MethodDT.cxx:563
SeparationBase * fSepType
Definition: MethodDT.h:122