70 fLogger(new
MsgLogger(
"CostComplexityPruneTool") )
104 if( dt == NULL || (
IsAutomatic() && validationSample == NULL) ) {
122 Log() << kDEBUG <<
"Sum of weights in pruning validation sample: " << W <<
Endl;
123 Log() << kDEBUG <<
"Quality of tree prior to any pruning is " << Q/W <<
Endl;
130 catch(
const std::string &error) {
131 Log() << kERROR <<
"Couldn't initialize the tree meta data because of error ("
132 << error <<
")" <<
Endl;
136 Log() << kDEBUG <<
"Automatic cost complexity pruning is " << (
IsAutomatic()?
"on":
"off") <<
"." <<
Endl;
141 catch(
const std::string &error) {
142 Log() << kERROR <<
"Error optimizing pruning sequence ("
143 << error <<
")" <<
Endl;
147 Log() << kDEBUG <<
"Index of pruning sequence to stop at: " <<
fOptimalK <<
Endl;
157 Log() << kINFO <<
"no proper pruning could be calculated. Tree "
158 << dt->
GetTreeID() <<
" will not be pruned. Do not worry if this "
159 <<
" happens for a few trees " <<
Endl;
182 if(
n == NULL )
return;
188 else n->SetNodeR( (s+
b)*
n->GetSeparationIndex() );
190 if(
n->GetLeft() != NULL &&
n->GetRight() != NULL) {
196 n->SetNTerminal(
n->GetLeft()->GetNTerminal() +
197 n->GetRight()->GetNTerminal());
199 n->SetSubTreeR( (
n->GetLeft()->GetSubTreeR() +
200 n->GetRight()->GetSubTreeR()));
202 n->SetAlpha( ((
n->GetNodeR() -
n->GetSubTreeR()) /
203 (
n->GetNTerminal() - 1)));
207 n->SetAlphaMinSubtree( std::min(
n->GetAlpha(), std::min(
n->GetLeft()->GetAlphaMinSubtree(),
208 n->GetRight()->GetAlphaMinSubtree())));
209 n->SetCC(
n->GetAlpha());
212 n->SetNTerminal( 1 );
n->SetTerminal( );
214 else n->SetSubTreeR( (s+
b)*
n->GetSeparationIndex() );
215 n->SetAlpha(std::numeric_limits<double>::infinity( ));
216 n->SetAlphaMinSubtree(std::numeric_limits<double>::infinity( ));
217 n->SetCC(
n->GetAlpha());
262 while(
R->GetNTerminal() > 1) {
265 alpha =
TMath::Max(
R->GetAlphaMinSubtree(), alpha);
267 if(
R->GetAlphaMinSubtree() >=
R->GetAlpha() ) {
268 Log() << kDEBUG <<
"\nCaught trying to prune the root node!" <<
Endl;
289 Log() << kDEBUG <<
"\nCaught trying to prune the root node!" <<
Endl;
315 Log() << kDEBUG <<
"after this pruning step I would have " <<
R->GetNTerminal() <<
" remaining terminal nodes " <<
Endl;
351 Log() << kDEBUG <<
"\n************ Summary for Tree " << dt->
GetTreeID() <<
" *******" <<
Endl
354 Log() << kDEBUG <<
"Pruning strength parameters: [";
359 Log() << kDEBUG <<
"Misclassification rates: [";
Double_t GetSubTreeR() const
void SetAlphaMinSubtree(Double_t g)
Double_t GetAlphaMinSubtree() const
void SetSubTreeR(Double_t r)
virtual DecisionTreeNode * GetLeft() const
Double_t GetNodeR() const
Double_t GetAlpha() const
Int_t GetNTerminal() const
void SetAlpha(Double_t alpha)
virtual DecisionTreeNode * GetParent() const
void SetNTerminal(Int_t n)
virtual DecisionTreeNode * GetRight() const
Implementation of a Decision Tree.
Double_t GetNodePurityLimit() const
void ApplyValidationSample(const EventConstList *validationSample) const
run the validation sample through the (pruned) tree and fill in the nodes the variables NSValidation ...
virtual DecisionTreeNode * GetRoot() const
void PruneNodeInPlace(TMVA::DecisionTreeNode *node)
prune a node temporarily (without actually deleting its descendants which allows testing the pruned t...
Double_t TestPrunedTreeQuality(const DecisionTreeNode *dt=nullptr, Int_t mode=0) const
return the misclassification rate of a pruned tree a "pruned tree" may have set the variable "IsTermi...
Double_t GetSumWeights(const EventConstList *validationSample) const
calculate the normalization factor for a pruning validation sample
ostringstream derivative to redirect and format output
void SetMinType(EMsgType minType)
std::vector< DecisionTreeNode * > PruneSequence
the regularization parameter for pruning
Double_t PruneStrength
quality measure for a pruned subtree T of T_max
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.