library: libTMVA
#include "DecisionTree.h"

TMVA::DecisionTree


class description - header file - source file
viewCVS header - viewCVS source

class TMVA::DecisionTree: public TMVA::BinaryTree

Inheritance Inherited Members Includes Libraries
Class Charts

Function Members (Methods)

Display options:
Show inherited
Show non-public
public:
virtual~DecisionTree()
TMVA::BinaryTreeTMVA::BinaryTree::BinaryTree()
Int_tBuildTree(vector<TMVA::Event*>& eventSample, TMVA::DecisionTreeNode* node = NULL)
Double_tCheckEvent(const TMVA::Event&, Bool_t UseYesNoLeaf = kFALSE)
static TClass*Class()
voidClearTree()
UInt_tCountLeafNodes(TMVA::DecisionTreeNode* n = NULL)
UInt_tTMVA::BinaryTree::CountNodes(TMVA::Node* n = NULL)
TMVA::DecisionTreeDecisionTree()
TMVA::DecisionTreeDecisionTree(TMVA::DecisionTreeNode* n)
TMVA::DecisionTreeDecisionTree(const TMVA::DecisionTree& d)
TMVA::DecisionTreeDecisionTree(TMVA::SeparationBase* sepType, Int_t minSize, Int_t nCuts, TMVA::SeparationBase* qtype = NULL)
voidDescendTree(TMVA::DecisionTreeNode* n = NULL)
voidDrawNode(TH2D* h, TMVA::DecisionTreeNode* n, Double_t y, Double_t x, Double_t scale)
TH2D*DrawTree(TString hname)
voidFillEvent(TMVA::Event& event, TMVA::DecisionTreeNode* node)
voidFillLinkStrengthMap(TMVA::DecisionTreeNode* n = NULL)
voidFillQualityGainMap(TMVA::DecisionTreeNode* n = NULL)
voidFillQualityMap(TMVA::DecisionTreeNode* n = NULL)
voidFillTree(vector<TMVA::Event*>& eventSample)
TMVA::DecisionTreeNode*FindCCPruneCandidate()
Double_tGetCostComplexity(Double_t alpha)
Double_tGetCostComplexityIfNextPruneStep(Double_t alpha)
UInt_tGetDepth()
TMVA::DecisionTreeNode*GetLeftDaughter(TMVA::DecisionTreeNode* n)
multimap<Double_t,TMVA::DecisionTreeNode*>&GetLinkStrengthMap()
UInt_tTMVA::BinaryTree::GetNNodes() const
TMVA::DecisionTreeNode*GetNode(ULong_t sequence, UInt_t depth)
multimap<Double_t,TMVA::DecisionTreeNode*>&GetQualityGainMap()
multimap<Double_t,TMVA::DecisionTreeNode*>&GetQualityMap()
TMVA::DecisionTreeNode*GetRightDaughter(TMVA::DecisionTreeNode* n)
TMVA::Node*TMVA::BinaryTree::GetRoot() const
vector<Double_t>GetVariableImportance()
Double_tGetVariableImportance(Int_t ivar)
TMVA::DecisionTreeNode*GetWeakestLink()
virtual TClass*IsA() const
Double_tMisClassificationCostOfNode(TMVA::DecisionTreeNode* n)
Double_tMisClassificationCostOfSubTree(TMVA::DecisionTreeNode* n = NULL)
voidTMVA::BinaryTree::Print(ostream& os) const
voidPruneNode(TMVA::DecisionTreeNode* node)
voidPruneTree()
voidPruneTreeCC()
voidPruneTreeEEP(TMVA::DecisionTreeNode* node)
voidPruneTreeMCC()
voidSetParentTreeInNodes(TMVA::DecisionTreeNode* n = NULL)
voidSetPruneMethod(TMVA::DecisionTree::EPruneMethod m = kExpectedErrorPruning)
voidSetPruneStrength(Double_t p)
voidTMVA::BinaryTree::SetRoot(TMVA::Node* r)
virtual voidShowMembers(TMemberInspector& insp, char* parent)
virtual voidStreamer(TBuffer& b)
voidStreamerNVirtual(TBuffer& b)
Double_tTrainNode(vector<TMVA::Event*>& eventSample, TMVA::DecisionTreeNode* node)
protected:
voidTMVA::BinaryTree::DeleteNode(TMVA::Node*)
private:
voidFindMinAndMax(vector<TMVA::Event*>& eventSample, vector<Double_t>& min, vector<Double_t>& max)
Double_tGetNodeError(TMVA::DecisionTreeNode* node)
Double_tGetSubTreeError(TMVA::DecisionTreeNode* node)
Double_tSamplePurity(TMVA::DecisionTree::vector<Event*> eventSample)
voidSetCutPoints(vector<Double_t>& cut_points, Double_t xmin, Double_t xmax, Int_t num_gridpoints)

Data Members

public:
enum EPruneMethod { kExpectedErrorPruning
kCostComplexityPruning
kMCC
};
protected:
Int_tTMVA::BinaryTree::fNNodestotal number of nodes in the tree (counted)
TMVA::MsgLoggerTMVA::BinaryTree::fLoggermessage loggera
private:
Int_tfNvarsnumber of variables used to separate S and B
Int_tfNCutsnumber of grid point in variable cut scans
TMVA::SeparationBase*fSepTypethe separation crition
Double_tfMinSizemin number of events in node
Double_tfMinSepGainmin number of separation gain to perform node splitting
Bool_tfUseSearchTreecut scan done with binary trees or simple event loop.
Double_tfPruneStrengtha parameter to set the "amount" of pruning..needs to be adjusted
TMVA::DecisionTree::EPruneMethodfPruneMethodmethod used for prunig
vector<Double_t>fVariableImportancethe relative importance of the different variables
UInt_tfDepthmaximal depth in tree reached
TMVA::SeparationBase*fQualityIndexseparation/quality criterio for CC-pruning
multimap<Double_t,TMVA::DecisionTreeNode*>fQualityGainMapthe quality-gain of pre-leaf nodes
multimap<Double_t,TMVA::DecisionTreeNode*>fQualityMapthe quality of leaf nodes
multimap<Double_t,TMVA::DecisionTreeNode*>fLinkStrengthMapprunestrenghts at which the subtree below the node would be pruned
static const Int_tfgDebugLeveldebug level determining some printout/control plots etc.

Class Description


 Implementation of a Decision Tree

 In a decision tree successive decision nodes are used to categorize the
 events out of the sample as either signal or background. Each node
 uses only a single discriminating variable to decide if the event is
 signal-like ("goes right") or background-like ("goes left"). This
 forms a tree like structure with "baskets" at the end (leave nodes),
 and an event is classified as either signal or background according to
 whether the basket where it ends up has been classified signal or
 background during the training. Training of a decision tree is the
 process to define the "cut criteria" for each node. The training
 starts with the root node. Here one takes the full training event
 sample and selects the variable and corresponding cut value that gives
 the best separation between signal and background at this stage. Using
 this cut criterion, the sample is then divided into two subsamples, a
 signal-like (right) and a background-like (left) sample. Two new nodes
 are then created for each of the two sub-samples and they are
 constructed using the same mechanism as described for the root
 node. The devision is stopped once a certain node has reached either a
 minimum number of events, or a minimum or maximum signal purity. These
 leave nodes are then called "signal" or "background" if they contain
 more signal respective background events from the training sample.
_______________________________________________________________________
DecisionTree( void )
 default constructor using the GiniIndex as separation criterion, 
 no restrictions on minium number of events in a leave note or the
 separation gain in the node splitting
DecisionTree( DecisionTreeNode* n )
 default constructor using the GiniIndex as separation criterion, 
 no restrictions on minium number of events in a leave note or the
 separation gain in the node splitting
DecisionTree( TMVA::SeparationBase *sepType,Int_t minSize, Int_t nCuts, TMVA::SeparationBase *qtype)
 constructor specifying the separation type, the min number of
 events in a no that is still subjected to further splitting, the
 number of bins in the grid used in applying the cut for the node
 splitting.
DecisionTree( const DecisionTree &d)
 copy constructor that creates a true copy, i.e. a completely independent tree 
 the node copy will recursively copy all the nodes 
~DecisionTree( void )
 destructor
 too bad, I cannot delete this as I don't know how to make a proper copy
 when copying a decsiont Tree.. hence so far I just copy the pointer, which
 means I cannot delet it here...
   delete fQualityIndex; 
void SetParentTreeInNodes( DecisionTreeNode *n)
 descend a tree to find all its leaf nodes, fill max depth reached in the
 tree at the same time. 
Int_t BuildTree( vector<TMVA::Event*> & eventSample, TMVA::DecisionTreeNode *node )
 building the decision tree by recursively calling the splitting of 
 one (root-) node into two daughter nodes (returns the number of nodes)
void FillTree( vector<TMVA::Event*> & eventSample)
 fill the existing the decision tree structure by filling event
 in from the top node and see where they happen to end up
void FillEvent( TMVA::Event & event, TMVA::DecisionTreeNode *node )
 fill the existing the decision tree structure by filling event
 in from the top node and see where they happen to end up
void ClearTree()
 clear the tree nodes (their S/N, Nevents etc), just keep the structure of the tree
void PruneTree()
 prune (get rid of internal nodes) the Decision tree to avoid overtraining
 serveral different pruning methods can be applied as selected by the 
 variable "fPruneMethod". Currently however only the Expected Error Pruning
 is implemented
 
void PruneTreeEEP(DecisionTreeNode *node)
 recursive prunig of nodes using the Expected Error Pruning (EEP)
 if internal node, then prune
void PruneTreeCC()
 prunig of nodes using the Cost Complexity criteria. The Pruning is performed
 until a minimum in the cost complexity CC(alpha) is reached.
 CC(alpha) = alpha*NLeafs + sum_over_leafs[ N*Quality(leaf) ]
 where Quality(leaf) is given by the 1-purity (for Misclassification Error)
 purity(1-purity) for Gini-Index..e.t.c.. typically the Misclassification Error
 is used for guiding the pruning.
void PruneTreeMCC()
 Similar to the CostCoplexity pruning, only here I calculate immediately
 the "prunestrength" (= alpha, the regularisation parameter in the CostComplexity)
 for which the respective subtree below a node would be pruned. Then I continue
 pruning until all nodes have such a value larger than the specified prunestrength
TMVA::DecisionTreeNode* GetWeakestLink()
 get the weakest link for the pruning step
void FillLinkStrengthMap(TMVA::DecisionTreeNode *n)
 loop over all non-leaf nodes of the tree and calculate for each
 of these nodes the prunestrenght ("alpha") at which this node
 would win against its subtree.(hence it's subtree would be pruned)
 this is given by:
          R(t) - R(T_t)       where R(t)  : MisClassificationCost of the node t
 alpha < --------------             R(T_t): MisClassificationCost of subtree blow t
          |T_t| - 1                 |T|   : # nodes in Tree T

Double_t MisClassificationCostOfNode(TMVA::DecisionTreeNode *n)
 get the misclassificationCost of the subTree
Double_t MisClassificationCostOfSubTree(TMVA::DecisionTreeNode *n)
 get the misclassificationCost of the subTree
UInt_t CountLeafNodes(TMVA::DecisionTreeNode *n)
 return the number of terminal nodes in the sub-tree below Node n
Double_t GetCostComplexity(Double_t alpha)
 returns the cost complexity criterion for the decision tree
 see "L.Breiman, J.H.Friedman, R.A.Olshen, C.J.Stone; "Classification and
 Regression Trees", Wadsworth International Group (1984), Chapman & Hall/CRC (1984)
Double_t GetCostComplexityIfNextPruneStep(Double_t alpha)
 returns the cost complexity criterion for the decision tree
 see "L.Breiman, J.H.Friedman, R.A.Olshen, C.J.Stone; "Classification and
 Regression Trees", Wadsworth International Group (1984), Chapman & Hall/CRC (1984)
void FillQualityGainMap(DecisionTreeNode* n )
 traverse the whole tree and fill the map of QualityGain - Node
 for pre-leaf nodes, deciding which node is the next prune candidate
void FillQualityMap(DecisionTreeNode* n )
 traverse the whole tree and find the leaf nodes, and then fill the Quality Criterion
 for the leaf nodes (Used in the Pruning)
void DescendTree( DecisionTreeNode *n)
 descend a tree to find all its leaf nodes
TMVA::DecisionTreeNode* FindCCPruneCandidate()
 get the  pruning candidate node as the one pre-leaf node that gives
 the smalest quality gain
void PruneNode(DecisionTreeNode *node)
 prune away the subtree below the node 
Double_t GetNodeError(DecisionTreeNode *node)
 calculate an UPPER limit on the error made by the classification done
 by this node. If the S/S+B of the node is f, then according to the
 training sample, the error rate (fraction of misclassified events by
 this node) is (1-f)
 now f has a statistical error according to the binomial distribution
 hence the error on f can be estimated (same error as the binomial error
 for efficency calculations ( sigma = sqrt(eff(1-eff)/nEvts ) ) 
Double_t GetSubTreeError(DecisionTreeNode *node)
 calculate the expected statistical error on the subtree below "node"
 which is used in the expected error pruning
TMVA::DecisionTreeNode* GetLeftDaughter( DecisionTreeNode *n)
 get left daughter node current node "n"
TMVA::DecisionTreeNode* GetRightDaughter( DecisionTreeNode *n)
 get right daughter node current node "n"
TMVA::DecisionTreeNode* GetNode(ULong_t sequence, UInt_t depth)
 retrieve node from the tree. Its position (up to a maximal tree depth of 64)
 is coded as a sequence of left-right moves starting from the root, coded as
 0-1 bit patterns stored in the "long-integer"  (i.e. 0:left ; 1:right
void FindMinAndMax(vector<TMVA::Event*> & eventSample, vector<Double_t> & xmin, vector<Double_t> & xmax)
 helper function which calculates gets the Min and Max value
 of the event variables in the current event sample
void SetCutPoints(vector<Double_t> & cut_points, Double_t xmin, Double_t xmax, Int_t num_gridpoints)
 helper function which calculates the grid points used for
 the cut scan
Double_t TrainNode(vector<TMVA::Event*> & eventSample, TMVA::DecisionTreeNode *node)
 decide how to split a node. At each node, ONE of the variables is
 choosen, which gives the best separation between signal and bkg on
 the sample which enters the Node.  
 In order to do this, for each variable a scan of the different cut
 values in a grid (grid = fNCuts) is performed and the resulting separation
 gains are compared.. This cut scan uses either a binary search tree
 or a simple loop over the events depending on the number of events
 in the sample 
Double_t CheckEvent(const TMVA::Event & e, Bool_t UseYesNoLeaf)
 the event e is put into the decision tree (starting at the root node)
 and the output is NodeType (signal) or (background) of the final node (basket)
 in which the given events ends up. I.e. the result of the classification if
 the event for this decision tree.
Double_t SamplePurity(vector<TMVA::Event*> eventSample)
calculates the purity S/(S+B) of a given event sample
vector< Double_t > GetVariableImportance()
return the relative variable importance, normalized to all
variables together having the importance 1. The importance in
evaluated as the total separation-gain that this variable had in
the decision trees (weighted by the number of events)
Double_t GetVariableImportance(Int_t ivar)
 returns the relative improtance of variable ivar
TH2D* DrawTree(TString hname)
 returns poiter to 2D histogram with tree drawing in it.
void DrawNode( TH2D* h, DecisionTreeNode *n, Double_t y, Double_t x, Double_t scale)
 recursively puts an entries in the histogram for the node and its daughters

void SetPruneMethod( EPruneMethod m = kExpectedErrorPruning )
{ fPruneMethod = m; }
void SetPruneStrength( Double_t p )
{ fPruneStrength = p; }
UInt_t GetDepth()
{return fDepth;}
multimap<Double_t,TMVA::DecisionTreeNode* >& GetQualityGainMap()
{ return fQualityGainMap; }
multimap<Double_t,TMVA::DecisionTreeNode* >& GetQualityMap()
{ return fQualityMap; }
multimap<Double_t,TMVA::DecisionTreeNode* >& GetLinkStrengthMap()
{ return fLinkStrengthMap; }

Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
Last update: root/tmva $Id: DecisionTree.cxx,v 1.11 2006/11/20 15:35:28 brun Exp $
Copyright (c) 2005: *


ROOT page - Class index - Class Hierarchy - Top of the page

This page has been automatically generated. If you have any comments or suggestions about the page layout send a mail to ROOT support, or contact the developers with any questions or problems regarding ROOT.