```#ifndef ROOT_TMVA_CCPruner
#define ROOT_TMVA_CCPruner
/**********************************************************************************
* Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
* Package: TMVA                                                                  *
* Class  : CCPruner                                                              *
* Web    : http://tmva.sourceforge.net                                           *
*                                                                                *
* Description: Cost Complexity Pruning                                           *
*
* Author: Doug Schouten (dschoute@sfu.ca)
*
*                                                                                *
*      CERN, Switzerland                                                         *
*      MPI-K Heidelberg, Germany                                                 *
*      U. of Texas at Austin, USA                                                *
*                                                                                *
* Redistribution and use in source and binary forms, with or without             *
* modification, are permitted according to the terms listed in LICENSE           *
**********************************************************************************/

////////////////////////////////////////////////////////////////////////////////////////////////////////////
// CCPruner - a helper class to prune a decision tree using the Cost Complexity method                    //
// (see Classification and Regression Trees by Leo Breiman et al)                                         //
//                                                                                                        //
// Some definitions:                                                                                      //
//                                                                                                        //
// T_max - the initial, usually highly overtrained tree, that is to be pruned back                        //
// R(T) - quality index (Gini, misclassification rate, or other) of a tree T                              //
// ~T - set of terminal nodes in T                                                                        //
// T' - the pruned subtree of T_max that has the best quality index R(T')                                 //
// alpha - the prune strength parameter in Cost Complexity pruning (R_alpha(T) = R(T) + alpha// |~T|)     //
//                                                                                                        //
// There are two running modes in CCPruner: (i) one may select a prune strength and prune back            //
// the tree T_max until the criterion                                                                     //
//             R(T) - R(t)                                                                                //
//  alpha <    ----------                                                                                 //
//             |~T_t| - 1                                                                                 //
//                                                                                                        //
// is true for all nodes t in T, or (ii) the algorithm finds the sequence of critical points              //
// alpha_k < alpha_k+1 ... < alpha_K such that T_K = root(T_max) and then selects the optimally-pruned    //
// subtree, defined to be the subtree with the best quality index for the validation sample.              //
////////////////////////////////////////////////////////////////////////////////////////////////////////////

#ifndef ROOT_TMVA_DecisionTree
#include "TMVA/DecisionTree.h"
#endif

/* #ifndef ROOT_TMVA_DecisionTreeNode */
/* #include "TMVA/DecisionTreeNode.h" */
/* #endif */

#ifndef ROOT_TMVA_Event
#include "TMVA/Event.h"
#endif

namespace TMVA {
class DataSet;
class DecisionTreeNode;
class SeparationBase;

class CCPruner {
public:
typedef std::vector<Event*> EventList;

CCPruner( DecisionTree* t_max,
const EventList* validationSample,
SeparationBase* qualityIndex = NULL );

CCPruner( DecisionTree* t_max,
const DataSet* validationSample,
SeparationBase* qualityIndex = NULL );

~CCPruner( );

// set the pruning strength parameter alpha (if alpha < 0, the optimal alpha is calculated)
void SetPruneStrength( Float_t alpha = -1.0 );

void Optimize( );

// return the list of pruning locations to define the optimal subtree T' of T_max
std::vector<TMVA::DecisionTreeNode*> GetOptimalPruneSequence( ) const;

// return the quality index from the validation sample for the optimal subtree T'
inline Float_t GetOptimalQualityIndex( ) const { return (fOptimalK >= 0 && fQualityIndexList.size() > 0 ?
fQualityIndexList[fOptimalK] : -1.0); }

// return the prune strength (=alpha) corresponding to the prune sequence
inline Float_t GetOptimalPruneStrength( ) const { return (fOptimalK >= 0 && fPruneStrengthList.size() > 0 ?
fPruneStrengthList[fOptimalK] : -1.0); }

private:
Float_t              fAlpha; //! regularization parameter in CC pruning
const EventList*     fValidationSample; //! the event sample to select the optimally-pruned tree
const DataSet*       fValidationDataSet; //! the event sample to select the optimally-pruned tree
SeparationBase*      fQualityIndex; //! the quality index used to calculate R(t), R(T) = sum[t in ~T]{ R(t) }
Bool_t               fOwnQIndex; //! flag indicates if fQualityIndex is owned by this

DecisionTree*        fTree; //! (pruned) decision tree

std::vector<TMVA::DecisionTreeNode*> fPruneSequence; //! map of weakest links (i.e., branches to prune) -> pruning index
std::vector<Float_t> fPruneStrengthList;  //! map of alpha -> pruning index
std::vector<Float_t> fQualityIndexList;   //! map of R(T) -> pruning index

Int_t                fOptimalK;           //! index of the optimal tree in the pruned tree sequence
Bool_t               fDebug;              //! debug flag
};
}

inline void TMVA::CCPruner::SetPruneStrength( Float_t alpha ) {
fAlpha = (alpha > 0 ? alpha : 0.0);
}

#endif

```
CCPruner.h:1
CCPruner.h:2
CCPruner.h:3
CCPruner.h:4
CCPruner.h:5
CCPruner.h:6
CCPruner.h:7
CCPruner.h:8
CCPruner.h:9
CCPruner.h:10
CCPruner.h:11
CCPruner.h:12
CCPruner.h:13
CCPruner.h:14
CCPruner.h:15
CCPruner.h:16
CCPruner.h:17
CCPruner.h:18
CCPruner.h:19
CCPruner.h:20
CCPruner.h:21
CCPruner.h:22
CCPruner.h:23
CCPruner.h:24
CCPruner.h:25
CCPruner.h:26
CCPruner.h:27
CCPruner.h:28
CCPruner.h:29
CCPruner.h:30
CCPruner.h:31
CCPruner.h:32
CCPruner.h:33
CCPruner.h:34
CCPruner.h:35
CCPruner.h:36
CCPruner.h:37
CCPruner.h:38
CCPruner.h:39
CCPruner.h:40
CCPruner.h:41
CCPruner.h:42
CCPruner.h:43
CCPruner.h:44
CCPruner.h:45
CCPruner.h:46
CCPruner.h:47
CCPruner.h:48
CCPruner.h:49
CCPruner.h:50
CCPruner.h:51
CCPruner.h:52
CCPruner.h:53
CCPruner.h:54
CCPruner.h:55
CCPruner.h:56
CCPruner.h:57
CCPruner.h:58
CCPruner.h:59
CCPruner.h:60
CCPruner.h:61
CCPruner.h:62
CCPruner.h:63
CCPruner.h:64
CCPruner.h:65
CCPruner.h:66
CCPruner.h:67
CCPruner.h:68
CCPruner.h:69
CCPruner.h:70
CCPruner.h:71
CCPruner.h:72
CCPruner.h:73
CCPruner.h:74
CCPruner.h:75
CCPruner.h:76
CCPruner.h:77
CCPruner.h:78
CCPruner.h:79
CCPruner.h:80
CCPruner.h:81
CCPruner.h:82
CCPruner.h:83
CCPruner.h:84
CCPruner.h:85
CCPruner.h:86
CCPruner.h:87
CCPruner.h:88
CCPruner.h:89
CCPruner.h:90
CCPruner.h:91
CCPruner.h:92
CCPruner.h:93
CCPruner.h:94
CCPruner.h:95
CCPruner.h:96
CCPruner.h:97
CCPruner.h:98
CCPruner.h:99
CCPruner.h:100
CCPruner.h:101
CCPruner.h:102
CCPruner.h:103
CCPruner.h:104
CCPruner.h:105
CCPruner.h:106
CCPruner.h:107
CCPruner.h:108
CCPruner.h:109
CCPruner.h:110
CCPruner.h:111
CCPruner.h:112
CCPruner.h:113
CCPruner.h:114
CCPruner.h:115
CCPruner.h:116
CCPruner.h:117
CCPruner.h:118
CCPruner.h:119
CCPruner.h:120