ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
ExpectedErrorPruneTool.h
Go to the documentation of this file.
1 /**********************************************************************************
2  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
3  * Package: TMVA *
4  * Class : TMVA::DecisionTree *
5  * Web : http://tmva.sourceforge.net *
6  * *
7  * Description: *
8  * Implementation of a Decision Tree *
9  * *
10  * Authors (alphabetical): *
11  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
12  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
13  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
14  * Doug Schouten <dschoute@sfu.ca> - Simon Fraser U., Canada *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * U. of Victoria, Canada *
19  * MPI-K Heidelberg, Germany *
20  * *
21  * Redistribution and use in source and binary forms, with or without *
22  * modification, are permitted according to the terms listed in LICENSE *
23  * (http://mva.sourceforge.net/license.txt) *
24  * *
25  **********************************************************************************/
26 
27 #ifndef ROOT_TMVA_ExpectedErrorPruneTool
28 #define ROOT_TMVA_ExpectedErrorPruneTool
29 
30 /////////////////////////////////////////////////////////////////////////////////////////////////////////////
31 // ExpectedErrorPruneTool - a helper class to prune a decision tree using the expected error (C4.5) method //
32 // //
33 // Uses an upper limit on the error made by the classification done by each node. If the S/S+B of the node //
34 // is f, then according to the training sample, the error rate (fraction of misclassified events by this //
35 // node) is (1-f). Now f has a statistical error according to the binomial distribution hence the error on //
36 // f can be estimated (same error as the binomial error for efficency calculations //
37 // ( sigma = sqrt(eff(1-eff)/nEvts ) ) //
38 // //
39 // This tool prunes branches from a tree if the expected error of a node is less than that of the sum of //
40 // the error in its descendants. //
41 // //
42 /////////////////////////////////////////////////////////////////////////////////////////////////////////////
43 
44 #include <vector>
45 #include <map>
46 
47 #ifndef ROOT_TMath
48 #include "TMath.h"
49 #endif
50 
51 #ifndef ROOT_TMVA_IPruneTool
52 #include "TMVA/IPruneTool.h"
53 #endif
54 
55 namespace TMVA {
56 
57  class MsgLogger;
58 
60  public:
62  virtual ~ExpectedErrorPruneTool( );
63 
64  // returns the PruningInfo object for a given tree and test sample
66  Bool_t isAutomatic = kFALSE );
67 
68  public:
69  // set the increment dalpha with which to scan for the optimal prune strength
70  inline void SetPruneStrengthIncrement( Double_t dalpha ) { fDeltaPruneStrength = dalpha; }
71 
72  private:
73  void FindListOfNodes( DecisionTreeNode* node );
74  Double_t GetNodeError( DecisionTreeNode* node ) const;
76  Int_t CountNodes( DecisionTreeNode* node, Int_t icount = 0 );
77 
78  Double_t fDeltaPruneStrength; //! the stepsize for optimizing the pruning strength parameter
79  Double_t fNodePurityLimit; //! the purity limit for labelling a terminal node as signal
80  std::vector<DecisionTreeNode*> fPruneSequence; //! the (optimal) prune sequence
81  // std::multimap<const Double_t, Double_t> fQualityMap; //! map of tree quality <=> prune strength
82  mutable MsgLogger* fLogger; // message logger
83  MsgLogger& Log() const { return *fLogger; }
84  };
85 
89  Int_t counter = icount + 1; // count this node
90  if(!(node->IsTerminal()) && l != NULL && r != NULL) {
91  counter = CountNodes(l,counter);
92  counter = CountNodes(r,counter);
93  }
94  return counter;
95  }
96 }
97 
98 #endif
99 
virtual DecisionTreeNode * GetRight() const
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
std::vector< DecisionTreeNode * > fPruneSequence
the purity limit for labelling a terminal node as signal
virtual DecisionTreeNode * GetLeft() const
std::vector< const Event * > EventSample
Definition: IPruneTool.h:76
virtual PruningInfo * CalculatePruningInfo(DecisionTree *dt, const IPruneTool::EventSample *testEvents=NULL, Bool_t isAutomatic=kFALSE)
ROOT::R::TRInterface & r
Definition: Object.C:4
void FindListOfNodes(DecisionTreeNode *node)
recursive pruning of nodes using the Expected Error Pruning (EEP)
TLine * l
Definition: textangle.C:4
Bool_t IsTerminal() const
Double_t fNodePurityLimit
the stepsize for optimizing the pruning strength parameter
double Double_t
Definition: RtypesCore.h:55
Int_t CountNodes(DecisionTreeNode *node, Int_t icount=0)
Double_t GetNodeError(DecisionTreeNode *node) const
Calculate an UPPER limit on the error made by the classification done by this node.
#define NULL
Definition: Rtypes.h:82
MsgLogger * fLogger
the (optimal) prune sequence
void SetPruneStrengthIncrement(Double_t dalpha)
Double_t GetSubTreeError(DecisionTreeNode *node) const
calculate the expected statistical error on the subtree below "node" which is used in the expected er...