Logo ROOT  
Reference Guide
MethodDT.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodDT (Boosted Decision Trees) *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Analysis of Boosted Decision Trees *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
16 * Or Cohen <orcohenor@gmail.com> - Weizmann Inst., Israel *
17 * *
18 * Copyright (c) 2005: *
19 * CERN, Switzerland *
20 * MPI-K Heidelberg, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27#ifndef ROOT_TMVA_MethodDT
28#define ROOT_TMVA_MethodDT
29
30//////////////////////////////////////////////////////////////////////////
31// //
32// MethodDT //
33// //
34// Analysis of Single Decision Tree //
35// //
36//////////////////////////////////////////////////////////////////////////
37
38#include <vector>
39#include "TH1.h"
40#include "TH2.h"
41#include "TTree.h"
42#include "TMVA/MethodBase.h"
43#include "TMVA/DecisionTree.h"
44#include "TMVA/Event.h"
45
46namespace TMVA {
47 class MethodBoost;
48
49 class MethodDT : public MethodBase {
50 public:
51 MethodDT( const TString& jobName,
52 const TString& methodTitle,
53 DataSetInfo& theData,
54 const TString& theOption = "");
55
57 const TString& theWeightFile);
58
59 virtual ~MethodDT( void );
60
61 virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
62
63 void Train( void );
64
66
67 // write weights to file
68 void AddWeightsXMLTo( void* parent ) const;
69
70 // read weights from file
71 void ReadWeightsFromStream( std::istream& istr );
72 void ReadWeightsFromXML ( void* wghtnode );
73
74 // calculate the MVA value
75 Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
76
77 // the option handling methods
78 void DeclareOptions();
79 void ProcessOptions();
81
82 void GetHelpMessage() const;
83
84 // ranking of input variables
85 const Ranking* CreateRanking();
86
88
90
92
93 void SetMinNodeSize(Double_t sizeInPercent);
94 void SetMinNodeSize(TString sizeInPercent);
95
98
99 private:
100 // Init used in the various constructors
101 void Init( void );
102
103 private:
104
105
106 std::vector<Event*> fEventSample; // the training events
107
108 DecisionTree* fTree; // the decision tree
109 //options for the decision Tree
110 SeparationBase *fSepType; // the separation used in node splitting
111 TString fSepTypeS; // the separation (option string) used in node splitting
112 Int_t fMinNodeEvents; // min number of events in node
113 Float_t fMinNodeSize; // min percentage of training events in node
114 TString fMinNodeSizeS; // string containing min percentage of training events in node
115
116 Int_t fNCuts; // grid used in cut applied in node splitting
117 Bool_t fUseYesNoLeaf; // use sig or bkg classification in leave nodes or sig/bkg
118 Double_t fNodePurityLimit; // purity limit for sig/bkg nodes
119 UInt_t fMaxDepth; // max depth
120
121
122 Double_t fErrorFraction; // ntuple var: misclassification error fraction
123 Double_t fPruneStrength; // a parameter to set the "amount" of pruning..needs to be adjusted
124 DecisionTree::EPruneMethod fPruneMethod; // method used for pruning
125 TString fPruneMethodS; // prune method option String
126 Bool_t fAutomatic; // use user given prune strength or automatically determined one using a validation sample
127 Bool_t fRandomisedTrees; // choose a random subset of possible cut variables at each node during training
128 Int_t fUseNvars; // the number of variables used in the randomised tree splitting
129 Bool_t fUsePoissonNvars; // fUseNvars is used as a poisson mean, and the actual value of useNvars is at each step drawn form that distribution
130 std::vector<Double_t> fVariableImportance; // the relative importance of the different variables
131
132 Double_t fDeltaPruneStrength; // step size in pruning, is adjusted according to experience of previous trees
133 // debugging flags
134 static const Int_t fgDebugLevel = 0; // debug level determining some printout/control plots etc.
135
136
137 Bool_t fPruneBeforeBoost; //ancient variable, only needed for "CompatibilityOptions"
138
139 ClassDef(MethodDT,0); // Analysis of Decision Trees
140
141 };
142}
143
144#endif
double Double_t
Definition: RtypesCore.h:57
float Float_t
Definition: RtypesCore.h:55
#define ClassDef(name, id)
Definition: Rtypes.h:322
int type
Definition: TGX11.cxx:120
UInt_t GetNNodes() const
Definition: BinaryTree.h:86
Class that contains all the data information.
Definition: DataSetInfo.h:60
Implementation of a Decision Tree.
Definition: DecisionTree.h:64
Int_t GetNNodesBeforePruning()
Definition: DecisionTree.h:179
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
virtual void ReadWeightsFromStream(std::istream &)=0
Analysis of Boosted Decision Trees.
Definition: MethodDT.h:49
UInt_t fMaxDepth
Definition: MethodDT.h:119
Bool_t fAutomatic
Definition: MethodDT.h:126
Int_t GetNNodes()
Definition: MethodDT.h:97
Float_t fMinNodeSize
Definition: MethodDT.h:113
virtual ~MethodDT(void)
destructor
Definition: MethodDT.cxx:369
MethodDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
the standard constructor for just an ordinar "decision trees"
Definition: MethodDT.cxx:128
Bool_t fUsePoissonNvars
Definition: MethodDT.h:129
Int_t fUseNvars
Definition: MethodDT.h:128
Double_t TestTreeQuality(DecisionTree *dt)
Definition: MethodDT.cxx:507
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodDT.cxx:181
SeparationBase * fSepType
Definition: MethodDT.h:110
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value
Definition: MethodDT.cxx:552
DecisionTree::EPruneMethod fPruneMethod
Definition: MethodDT.h:124
Double_t fErrorFraction
Definition: MethodDT.h:122
static const Int_t fgDebugLevel
Definition: MethodDT.h:134
void Train(void)
Definition: MethodDT.cxx:376
Double_t fDeltaPruneStrength
Definition: MethodDT.h:132
Bool_t fUseYesNoLeaf
Definition: MethodDT.h:117
const Ranking * CreateRanking()
Definition: MethodDT.cxx:567
void ReadWeightsFromXML(void *wghtnode)
Definition: MethodDT.cxx:532
Double_t fNodePurityLimit
Definition: MethodDT.h:118
TString fSepTypeS
Definition: MethodDT.h:111
TString fMinNodeSizeS
Definition: MethodDT.h:114
void GetHelpMessage() const
Definition: MethodDT.cxx:562
std::vector< Double_t > fVariableImportance
Definition: MethodDT.h:130
void AddWeightsXMLTo(void *parent) const
Definition: MethodDT.cxx:524
Double_t PruneTree()
prune the decision tree if requested (good for individual trees that are best grown out,...
Definition: MethodDT.cxx:407
void ReadWeightsFromStream(std::istream &istr)
Definition: MethodDT.cxx:542
Int_t GetNNodesBeforePruning()
Definition: MethodDT.h:96
Bool_t fRandomisedTrees
Definition: MethodDT.h:127
DecisionTree * fTree
Definition: MethodDT.h:108
std::vector< Event * > fEventSample
Definition: MethodDT.h:106
Double_t GetPruneStrength()
Definition: MethodDT.h:91
void DeclareOptions()
Define the options (their key words) that can be set in the option string.
Definition: MethodDT.cxx:213
Bool_t fPruneBeforeBoost
Definition: MethodDT.h:137
void Init(void)
common initialisation with defaults for the DT-Method
Definition: MethodDT.cxx:344
void SetMinNodeSize(Double_t sizeInPercent)
Definition: MethodDT.cxx:321
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
Definition: MethodDT.cxx:246
Int_t fNCuts
Definition: MethodDT.h:116
Double_t fPruneStrength
Definition: MethodDT.h:123
TString fPruneMethodS
Definition: MethodDT.h:125
Int_t fMinNodeEvents
Definition: MethodDT.h:112
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
Definition: MethodDT.cxx:257
Ranking for variables in method (implementation)
Definition: Ranking.h:48
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
EAnalysisType
Definition: Types.h:127
Basic string class.
Definition: TString.h:131
create variable transformations