ROOT
6.07/01
Reference Guide
ROOT Home Page
Main Page
Tutorials
User's Classes
Namespaces
All Classes
Files
Release Notes
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Properties
Friends
Macros
Groups
Pages
tmva
tmva
inc
TMVA
CostComplexityPruneTool.h
Go to the documentation of this file.
1
/**********************************************************************************
2
* Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
3
* Package: TMVA *
4
* Class : TMVA::DecisionTree *
5
* Web : http://tmva.sourceforge.net *
6
* *
7
* Description: *
8
* Implementation of a Decision Tree *
9
* *
10
* Authors (alphabetical): *
11
* Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
12
* Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
13
* Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
14
* Doug Schouten <dschoute@sfu.ca> - Simon Fraser U., Canada *
15
* *
16
* Copyright (c) 2005: *
17
* CERN, Switzerland *
18
* U. of Victoria, Canada *
19
* MPI-K Heidelberg, Germany *
20
* *
21
* Redistribution and use in source and binary forms, with or without *
22
* modification, are permitted according to the terms listed in LICENSE *
23
* (http://mva.sourceforge.net/license.txt) *
24
* *
25
**********************************************************************************/
26
27
#ifndef ROOT_TMVA_CostComplexityPruneTool
28
#define ROOT_TMVA_CostComplexityPruneTool
29
30
////////////////////////////////////////////////////////////////////////////////////////////////////////////
31
// CostComplexityPruneTool - a class to prune a decision tree using the Cost Complexity method //
32
// (see "Classification and Regression Trees" by Leo Breiman et al) //
33
// //
34
// Some definitions: //
35
// //
36
// T_max - the initial, usually highly overtrained tree, that is to be pruned back //
37
// R(T) - quality index (Gini, misclassification rate, or other) of a tree T //
38
// ~T - set of terminal nodes in T //
39
// T' - the pruned subtree of T_max that has the best quality index R(T') //
40
// alpha - the prune strength parameter in Cost Complexity pruning (R_alpha(T) = R(T) + alpha*|~T|) //
41
// //
42
// There are two running modes in CostComplexityPruneTool: (i) one may select a prune strength and prune //
43
// the tree T_max until the criterion //
44
// R(T) - R(t) //
45
// alpha < ---------- //
46
// |~T_t| - 1 //
47
// //
48
// is true for all nodes t in T, or (ii) the algorithm finds the sequence of critical points //
49
// alpha_k < alpha_k+1 ... < alpha_K such that T_K = root(T_max) and then selects the optimally-pruned //
50
// subtree, defined to be the subtree with the best quality index for the validation sample. //
51
////////////////////////////////////////////////////////////////////////////////////////////////////////////
52
53
#ifndef ROOT_TMVA_SeparationBase
54
#include "
TMVA/SeparationBase.h
"
55
#endif
56
#ifndef ROOT_TMVA_GiniIndex
57
#include "
TMVA/GiniIndex.h
"
58
#endif
59
#ifndef ROOT_TMVA_DecisionTree
60
#include "
TMVA/DecisionTree.h
"
61
#endif
62
#ifndef ROOT_TMVA_Event
63
#include "
TMVA/Event.h
"
64
#endif
65
#ifndef ROOT_TMVA_IPruneTool
66
#include "
TMVA/IPruneTool.h
"
67
#endif
68
69
namespace
TMVA {
70
71
class
CostComplexityPruneTool
:
public
IPruneTool
{
72
public
:
73
CostComplexityPruneTool
(
SeparationBase
* qualityIndex =
NULL
);
74
virtual
~CostComplexityPruneTool
( );
75
76
// calculate the prune sequence for a given tree
77
virtual
PruningInfo
*
CalculatePruningInfo
(
DecisionTree
* dt,
const
IPruneTool::EventSample
* testEvents =
NULL
,
Bool_t
isAutomatic =
kFALSE
);
78
79
private
:
80
SeparationBase
*
fQualityIndexTool
;
//! the quality index used to calculate R(t), R(T) = sum[t in ~T]{ R(t) }
81
82
std::vector<DecisionTreeNode*>
fPruneSequence
;
//! map of weakest links (i.e., branches to prune) -> pruning index
83
std::vector<Double_t>
fPruneStrengthList
;
//! map of alpha -> pruning index
84
std::vector<Double_t>
fQualityIndexList
;
//! map of R(T) -> pruning index
85
86
Int_t
fOptimalK
;
//! the optimal index of the prune sequence
87
88
private
:
89
// set the meta data used for cost complexity pruning
90
void
InitTreePruningMetaData
(
DecisionTreeNode
*
n
);
91
92
// optimize the pruning sequence
93
void
Optimize
(
DecisionTree
* dt,
Double_t
weights );
94
95
mutable
MsgLogger
*
fLogger
;
//! output stream to save logging information
96
MsgLogger
&
Log
()
const
{
return
*
fLogger
; }
97
98
};
99
}
100
101
102
#endif
DecisionTree.h
TMVA::CostComplexityPruneTool::fOptimalK
Int_t fOptimalK
map of R(T) -> pruning index
Definition:
CostComplexityPruneTool.h:86
TMVA::CostComplexityPruneTool::~CostComplexityPruneTool
virtual ~CostComplexityPruneTool()
the destructor for the cost complexity prunig
Definition:
CostComplexityPruneTool.cxx:66
TMVA::CostComplexityPruneTool::CostComplexityPruneTool
CostComplexityPruneTool(SeparationBase *qualityIndex=NULL)
the constructor for the cost complexity prunig
Definition:
CostComplexityPruneTool.cxx:45
Int_t
int Int_t
Definition:
RtypesCore.h:41
Bool_t
bool Bool_t
Definition:
RtypesCore.h:59
kFALSE
const Bool_t kFALSE
Definition:
Rtypes.h:92
TMVA::CostComplexityPruneTool::fLogger
MsgLogger * fLogger
Definition:
CostComplexityPruneTool.h:95
IPruneTool.h
SeparationBase.h
TMVA::CostComplexityPruneTool::fQualityIndexTool
SeparationBase * fQualityIndexTool
Definition:
CostComplexityPruneTool.h:80
TMVA::IPruneTool::EventSample
std::vector< const Event * > EventSample
Definition:
IPruneTool.h:76
TMVA::PruningInfo
Definition:
IPruneTool.h:43
TMVA::CostComplexityPruneTool::Optimize
void Optimize(DecisionTree *dt, Double_t weights)
after the critical alpha values (at which the corresponding nodes would be pruned away) had been esta...
Definition:
CostComplexityPruneTool.cxx:213
GiniIndex.h
TMVA::CostComplexityPruneTool::Log
MsgLogger & Log() const
output stream to save logging information
Definition:
CostComplexityPruneTool.h:96
TMVA::DecisionTree
Definition:
DecisionTree.h:73
TMVA::SeparationBase
Definition:
SeparationBase.h:88
TMVA::CostComplexityPruneTool
Definition:
CostComplexityPruneTool.h:71
Double_t
double Double_t
Definition:
RtypesCore.h:55
Event.h
TMVA::IPruneTool
Definition:
IPruneTool.h:72
TMVA::CostComplexityPruneTool::fPruneStrengthList
std::vector< Double_t > fPruneStrengthList
map of weakest links (i.e., branches to prune) -> pruning index
Definition:
CostComplexityPruneTool.h:83
TMVA::CostComplexityPruneTool::fQualityIndexList
std::vector< Double_t > fQualityIndexList
map of alpha -> pruning index
Definition:
CostComplexityPruneTool.h:84
TMVA::MsgLogger
Definition:
MsgLogger.h:63
TMVA::CostComplexityPruneTool::fPruneSequence
std::vector< DecisionTreeNode * > fPruneSequence
the quality index used to calculate R(t), R(T) = sum[t in ~T]{ R(t) }
Definition:
CostComplexityPruneTool.h:82
TMVA::CostComplexityPruneTool::InitTreePruningMetaData
void InitTreePruningMetaData(DecisionTreeNode *n)
the optimal index of the prune sequence
Definition:
CostComplexityPruneTool.cxx:159
NULL
#define NULL
Definition:
Rtypes.h:82
n
const Int_t n
Definition:
legend1.C:16
TMVA::DecisionTreeNode
Definition:
DecisionTreeNode.h:120
TMVA::CostComplexityPruneTool::CalculatePruningInfo
virtual PruningInfo * CalculatePruningInfo(DecisionTree *dt, const IPruneTool::EventSample *testEvents=NULL, Bool_t isAutomatic=kFALSE)
Definition:
CostComplexityPruneTool.cxx:73