TMVA::MethodRuleFit - source file

// @(#)root/tmva $Id$
// Author: Fredrik Tegenfeldt

/**********************************************************************************
 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
 * Package: TMVA                                                                  *
 * Class  : MethodRuleFit                                                         *
 * Web    : http://tmva.sourceforge.net                                           *
 *                                                                                *
 * Description:                                                                   *
 *      Friedman's RuleFit method                                                 * 
 *                                                                                *
 * Authors (alphabetical):                                                        *
 *      Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA      *
 *                                                                                *
 * Copyright (c) 2005:                                                            *
 *      CERN, Switzerland                                                         * 
 *      Iowa State U.                                                             *
 *      MPI-K Heidelberg, Germany                                                 * 
 *                                                                                *
 * Redistribution and use in source and binary forms, with or without             *
 * modification, are permitted according to the terms listed in LICENSE           *
 *                                                                                *
 **********************************************************************************/

#ifndef ROOT_TMVA_MethodRuleFit
#define ROOT_TMVA_MethodRuleFit

//////////////////////////////////////////////////////////////////////////
//                                                                      //
// MethodRuleFit                                                        //
//                                                                      //
// J Friedman's RuleFit method                                          //
//                                                                      //
//////////////////////////////////////////////////////////////////////////

#ifndef ROOT_TMVA_MethodBase
#include "TMVA/MethodBase.h"
#endif
#ifndef ROOT_TMatrixDfwd
#include "TMatrixDfwd.h"
#endif
#ifndef ROOT_TVectorD
#include "TVectorD.h"
#endif
#ifndef ROOT_TMVA_DecisionTree
#include "TMVA/DecisionTree.h"
#endif
#ifndef ROOT_TMVA_RuleFit
#include "TMVA/RuleFit.h"
#endif

namespace TMVA {

   class SeparationBase;

   class MethodRuleFit : public MethodBase {

   public:

      MethodRuleFit( const TString& jobName,
                     const TString& methodTitle, 
                     DataSetInfo& theData,
                     const TString& theOption = "",
                     TDirectory* theTargetDir = 0 );

      MethodRuleFit( DataSetInfo& theData,
                     const TString& theWeightFile,
                     TDirectory* theTargetDir = NULL );

      virtual ~MethodRuleFit( void );

      virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ );

      // training method
      void Train( void );

      using MethodBase::ReadWeightsFromStream;

      // write weights to file
      void AddWeightsXMLTo     ( void* parent ) const;

      // read weights from file
      void ReadWeightsFromStream( std::istream& istr );
      void ReadWeightsFromXML   ( void* wghtnode );

      // calculate the MVA value
      Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );

      // write method specific histos to target file
      void WriteMonitoringHistosToFile( void ) const;

      // ranking of input variables
      const Ranking* CreateRanking();

      Bool_t                                   UseBoost()           const   { return fUseBoost; }

      // accessors
      RuleFit*                                 GetRuleFitPtr()              { return &fRuleFit; }
      const RuleFit*                           GetRuleFitConstPtr() const   { return &fRuleFit; }
      TDirectory*                              GetMethodBaseDir()   const   { return BaseDir(); }
      const std::vector<TMVA::Event*>&         GetTrainingEvents()  const   { return fEventSample; }
      const std::vector<TMVA::DecisionTree*>&  GetForest()          const   { return fForest; }
      Int_t                                    GetNTrees()          const   { return fNTrees; }
      Double_t                                 GetTreeEveFrac()     const   { return fTreeEveFrac; }
      const SeparationBase*                    GetSeparationBaseConst() const { return fSepType; }
      SeparationBase*                          GetSeparationBase()  const   { return fSepType; }
      TMVA::DecisionTree::EPruneMethod         GetPruneMethod()     const   { return fPruneMethod; }
      Double_t                                 GetPruneStrength()   const   { return fPruneStrength; }
      Double_t                                 GetMinFracNEve()     const   { return fMinFracNEve; }
      Double_t                                 GetMaxFracNEve()     const   { return fMaxFracNEve; }
      Int_t                                    GetNCuts()           const   { return fNCuts; }
      //
      Int_t                                    GetGDNPathSteps()    const   { return fGDNPathSteps; }
      Double_t                                 GetGDPathStep()      const   { return fGDPathStep; }
      Double_t                                 GetGDErrScale()      const   { return fGDErrScale; }
      Double_t                                 GetGDPathEveFrac()   const   { return fGDPathEveFrac; }
      Double_t                                 GetGDValidEveFrac()  const   { return fGDValidEveFrac; }
      //
      Double_t                                 GetLinQuantile()     const   { return fLinQuantile; }

      const TString                            GetRFWorkDir()       const   { return fRFWorkDir; }
      Int_t                                    GetRFNrules()        const   { return fRFNrules; }
      Int_t                                    GetRFNendnodes()     const   { return fRFNendnodes; }

   protected:

      // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
      void MakeClassSpecific( std::ostream&, const TString& ) const;

      void MakeClassRuleCuts( std::ostream& ) const;

      void MakeClassLinear( std::ostream& ) const;

      // get help message text
      void GetHelpMessage() const;

      // initialize rulefit
      void Init( void );

      // copy all training events into a stl::vector
      void InitEventSample( void );

      // initialize monitor ntuple
      void InitMonitorNtuple();

      void TrainTMVARuleFit();
      void TrainJFRuleFit();

   private:

      // check variable range and set var to lower or upper if out of range
      template<typename T>
      inline Bool_t VerifyRange( MsgLogger& mlog, const char *varstr, T& var, const T& vmin, const T& vmax );

      template<typename T>
      inline Bool_t VerifyRange( MsgLogger& mlog, const char *varstr, T& var, const T& vmin, const T& vmax, const T& vdef );

      template<typename T>
      inline Int_t VerifyRange( const T& var, const T& vmin, const T& vmax );

      // the option handling methods
      void DeclareOptions();
      void ProcessOptions();

      RuleFit                      fRuleFit;        // RuleFit instance
      std::vector<TMVA::Event *>   fEventSample;    // the complete training sample
      Double_t                     fSignalFraction; // scalefactor for bkg events to modify initial s/b fraction in training data

      // ntuple
      TTree                       *fMonitorNtuple;  // pointer to monitor rule ntuple
      Double_t                     fNTImportance;   // ntuple: rule importance
      Double_t                     fNTCoefficient;  // ntuple: rule coefficient
      Double_t                     fNTSupport;      // ntuple: rule support
      Int_t                        fNTNcuts;        // ntuple: rule number of cuts
      Int_t                        fNTNvars;        // ntuple: rule number of vars
      Double_t                     fNTPtag;         // ntuple: rule P(tag)
      Double_t                     fNTPss;          // ntuple: rule P(tag s, true s)
      Double_t                     fNTPsb;          // ntuple: rule P(tag s, true b)
      Double_t                     fNTPbs;          // ntuple: rule P(tag b, true s)
      Double_t                     fNTPbb;          // ntuple: rule P(tag b, true b)
      Double_t                     fNTSSB;          // ntuple: rule S/(S+B)
      Int_t                        fNTType;         // ntuple: rule type (+1->signal, -1->bkg)

      // options
      TString                      fRuleFitModuleS;// which rulefit module to use
      Bool_t                       fUseRuleFitJF;  // if true interface with J.Friedmans RuleFit module
      TString                      fRFWorkDir;     // working directory from Friedmans module
      Int_t                        fRFNrules;      // max number of rules (only Friedmans module)
      Int_t                        fRFNendnodes;   // max number of rules (only Friedmans module)
      std::vector<DecisionTree *>  fForest;        // the forest
      Int_t                        fNTrees;        // number of trees in forest
      Double_t                     fTreeEveFrac;   // fraction of events used for traing each tree
      SeparationBase              *fSepType;       // the separation used in node splitting
      Double_t                     fMinFracNEve;   // min fraction of number events
      Double_t                     fMaxFracNEve;   // ditto max
      Int_t                        fNCuts;         // grid used in cut applied in node splitting
      TString                      fSepTypeS;        // forest generation: separation type - see DecisionTree
      TString                      fPruneMethodS;    // forest generation: prune method - see DecisionTree
      TMVA::DecisionTree::EPruneMethod fPruneMethod; // forest generation: method used for pruning - see DecisionTree 
      Double_t                     fPruneStrength;   // forest generation: prune strength - see DecisionTree
      TString                      fForestTypeS;     // forest generation: how the trees are generated
      Bool_t                       fUseBoost;        // use boosted events for forest generation
      //
      Double_t                     fGDPathEveFrac; //  GD path: fraction of subsamples used for the fitting
      Double_t                     fGDValidEveFrac; // GD path: fraction of subsamples used for the fitting
      Double_t                     fGDTau;          // GD path: def threshhold fraction [0..1]
      Double_t                     fGDTauPrec;      // GD path: precision of estimated tau
      Double_t                     fGDTauMin;       // GD path: min threshhold fraction [0..1]
      Double_t                     fGDTauMax;       // GD path: max threshhold fraction [0..1]
      UInt_t                       fGDTauScan;      // GD path: number of points to scan
      Double_t                     fGDPathStep;     // GD path: step size in path
      Int_t                        fGDNPathSteps;   // GD path: number of steps
      Double_t                     fGDErrScale;     // GD path: stop 
      Double_t                     fMinimp;         // rule/linear: minimum importance
      //
      TString                      fModelTypeS;     // rule ensemble: which model (rule,linear or both)
      Double_t                     fRuleMinDist;    // rule min distance - see RuleEnsemble
      Double_t                     fLinQuantile;    // quantile cut to remove outliers - see RuleEnsemble

      ClassDef(MethodRuleFit,0)  // Friedman's RuleFit method
   };

} // namespace TMVA


//_______________________________________________________________________
template<typename T>
inline Int_t TMVA::MethodRuleFit::VerifyRange( const T& var, const T& vmin, const T& vmax )
{
   // check range and return +1 if above, -1 if below or 0 if inside
   if (var>vmax) return  1;
   if (var<vmin) return -1;
   return 0;
}

//_______________________________________________________________________
template<typename T>
inline Bool_t TMVA::MethodRuleFit::VerifyRange( TMVA::MsgLogger& mlog, const char *varstr, T& var, const T& vmin, const T& vmax )
{
   // verify range and print out message
   // if outside range, set to closest limit
   Int_t dir = TMVA::MethodRuleFit::VerifyRange(var,vmin,vmax);
   Bool_t modif=kFALSE;
   if (dir==1) {
      modif = kTRUE;
      var=vmax;
   }
   if (dir==-1) {
      modif = kTRUE;
      var=vmin;
   }
   if (modif) {
      mlog << kWARNING << "Option <" << varstr << "> " << (dir==1 ? "above":"below") << " allowed range. Reset to new value = " << var << Endl;
   }
   return modif;
}

//_______________________________________________________________________
template<typename T>
inline Bool_t TMVA::MethodRuleFit::VerifyRange( TMVA::MsgLogger& mlog, const char *varstr, T& var, const T& vmin, const T& vmax, const T& vdef )
{
   // verify range and print out message
   // if outside range, set to given default value
   Int_t dir = TMVA::MethodRuleFit::VerifyRange(var,vmin,vmax);
   Bool_t modif=kFALSE;
   if (dir!=0) {
      modif = kTRUE;
      var=vdef;
   }
   if (modif) {
      mlog << kWARNING << "Option <" << varstr << "> " << (dir==1 ? "above":"below") << " allowed range. Reset to default value = " << var << Endl;
   }
   return modif;
}


#endif // MethodRuleFit_H

MethodRuleFit.h:1

MethodRuleFit.h:2

MethodRuleFit.h:3

MethodRuleFit.h:4

MethodRuleFit.h:5

MethodRuleFit.h:6

MethodRuleFit.h:7

MethodRuleFit.h:8

MethodRuleFit.h:9

MethodRuleFit.h:10

MethodRuleFit.h:11

MethodRuleFit.h:12

MethodRuleFit.h:13

MethodRuleFit.h:14

MethodRuleFit.h:15

MethodRuleFit.h:16

MethodRuleFit.h:17

MethodRuleFit.h:18

MethodRuleFit.h:19

MethodRuleFit.h:20

MethodRuleFit.h:21

MethodRuleFit.h:22

MethodRuleFit.h:23

MethodRuleFit.h:24

MethodRuleFit.h:25

MethodRuleFit.h:26

MethodRuleFit.h:27

MethodRuleFit.h:28

MethodRuleFit.h:29

MethodRuleFit.h:30

MethodRuleFit.h:31

MethodRuleFit.h:32

MethodRuleFit.h:33

MethodRuleFit.h:34

MethodRuleFit.h:35

MethodRuleFit.h:36

MethodRuleFit.h:37

MethodRuleFit.h:38

MethodRuleFit.h:39

MethodRuleFit.h:40

MethodRuleFit.h:41

MethodRuleFit.h:42

MethodRuleFit.h:43

MethodRuleFit.h:44

MethodRuleFit.h:45

MethodRuleFit.h:46

MethodRuleFit.h:47

MethodRuleFit.h:48

MethodRuleFit.h:49

MethodRuleFit.h:50

MethodRuleFit.h:51

MethodRuleFit.h:52

MethodRuleFit.h:53

MethodRuleFit.h:54

MethodRuleFit.h:55

MethodRuleFit.h:56

MethodRuleFit.h:57

MethodRuleFit.h:58

MethodRuleFit.h:59

MethodRuleFit.h:60

MethodRuleFit.h:61

MethodRuleFit.h:62

MethodRuleFit.h:63

MethodRuleFit.h:64

MethodRuleFit.h:65

MethodRuleFit.h:66

MethodRuleFit.h:67

MethodRuleFit.h:68

MethodRuleFit.h:69

MethodRuleFit.h:70

MethodRuleFit.h:71

MethodRuleFit.h:72

MethodRuleFit.h:73

MethodRuleFit.h:74

MethodRuleFit.h:75

MethodRuleFit.h:76

MethodRuleFit.h:77

MethodRuleFit.h:78

MethodRuleFit.h:79

MethodRuleFit.h:80

MethodRuleFit.h:81

MethodRuleFit.h:82

MethodRuleFit.h:83

MethodRuleFit.h:84

MethodRuleFit.h:85

MethodRuleFit.h:86

MethodRuleFit.h:87

MethodRuleFit.h:88

MethodRuleFit.h:89

MethodRuleFit.h:90

MethodRuleFit.h:91

MethodRuleFit.h:92

MethodRuleFit.h:93

MethodRuleFit.h:94

MethodRuleFit.h:95

MethodRuleFit.h:96

MethodRuleFit.h:97

MethodRuleFit.h:98

MethodRuleFit.h:99

MethodRuleFit.h:100

MethodRuleFit.h:101

MethodRuleFit.h:102

MethodRuleFit.h:103

MethodRuleFit.h:104

MethodRuleFit.h:105

MethodRuleFit.h:106

MethodRuleFit.h:107

MethodRuleFit.h:108

MethodRuleFit.h:109

MethodRuleFit.h:110

MethodRuleFit.h:111

MethodRuleFit.h:112

MethodRuleFit.h:113

MethodRuleFit.h:114

MethodRuleFit.h:115

MethodRuleFit.h:116

MethodRuleFit.h:117

MethodRuleFit.h:118

MethodRuleFit.h:119

MethodRuleFit.h:120

MethodRuleFit.h:121

MethodRuleFit.h:122

MethodRuleFit.h:123

MethodRuleFit.h:124

MethodRuleFit.h:125

MethodRuleFit.h:126

MethodRuleFit.h:127

MethodRuleFit.h:128

MethodRuleFit.h:129

MethodRuleFit.h:130

MethodRuleFit.h:131

MethodRuleFit.h:132

MethodRuleFit.h:133

MethodRuleFit.h:134

MethodRuleFit.h:135

MethodRuleFit.h:136

MethodRuleFit.h:137

MethodRuleFit.h:138

MethodRuleFit.h:139

MethodRuleFit.h:140

MethodRuleFit.h:141

MethodRuleFit.h:142

MethodRuleFit.h:143

MethodRuleFit.h:144

MethodRuleFit.h:145

MethodRuleFit.h:146

MethodRuleFit.h:147

MethodRuleFit.h:148

MethodRuleFit.h:149

MethodRuleFit.h:150

MethodRuleFit.h:151

MethodRuleFit.h:152

MethodRuleFit.h:153

MethodRuleFit.h:154

MethodRuleFit.h:155

MethodRuleFit.h:156

MethodRuleFit.h:157

MethodRuleFit.h:158

MethodRuleFit.h:159

MethodRuleFit.h:160

MethodRuleFit.h:161

MethodRuleFit.h:162

MethodRuleFit.h:163

MethodRuleFit.h:164

MethodRuleFit.h:165

MethodRuleFit.h:166

MethodRuleFit.h:167

MethodRuleFit.h:168

MethodRuleFit.h:169

MethodRuleFit.h:170

MethodRuleFit.h:171

MethodRuleFit.h:172

MethodRuleFit.h:173

MethodRuleFit.h:174

MethodRuleFit.h:175

MethodRuleFit.h:176

MethodRuleFit.h:177

MethodRuleFit.h:178

MethodRuleFit.h:179

MethodRuleFit.h:180

MethodRuleFit.h:181

MethodRuleFit.h:182

MethodRuleFit.h:183

MethodRuleFit.h:184

MethodRuleFit.h:185

MethodRuleFit.h:186

MethodRuleFit.h:187

MethodRuleFit.h:188

MethodRuleFit.h:189

MethodRuleFit.h:190

MethodRuleFit.h:191

MethodRuleFit.h:192

MethodRuleFit.h:193

MethodRuleFit.h:194

MethodRuleFit.h:195

MethodRuleFit.h:196

MethodRuleFit.h:197

MethodRuleFit.h:198

MethodRuleFit.h:199

MethodRuleFit.h:200

MethodRuleFit.h:201

MethodRuleFit.h:202

MethodRuleFit.h:203

MethodRuleFit.h:204

MethodRuleFit.h:205

MethodRuleFit.h:206

MethodRuleFit.h:207

MethodRuleFit.h:208

MethodRuleFit.h:209

MethodRuleFit.h:210

MethodRuleFit.h:211

MethodRuleFit.h:212

MethodRuleFit.h:213

MethodRuleFit.h:214

MethodRuleFit.h:215

MethodRuleFit.h:216

MethodRuleFit.h:217

MethodRuleFit.h:218

MethodRuleFit.h:219

MethodRuleFit.h:220

MethodRuleFit.h:221

MethodRuleFit.h:222

MethodRuleFit.h:223

MethodRuleFit.h:224

MethodRuleFit.h:225

MethodRuleFit.h:226

MethodRuleFit.h:227

MethodRuleFit.h:228

MethodRuleFit.h:229

MethodRuleFit.h:230

MethodRuleFit.h:231

MethodRuleFit.h:232

MethodRuleFit.h:233

MethodRuleFit.h:234

MethodRuleFit.h:235

MethodRuleFit.h:236

MethodRuleFit.h:237

MethodRuleFit.h:238

MethodRuleFit.h:239

MethodRuleFit.h:240

MethodRuleFit.h:241

MethodRuleFit.h:242

MethodRuleFit.h:243

MethodRuleFit.h:244

MethodRuleFit.h:245

MethodRuleFit.h:246

MethodRuleFit.h:247

MethodRuleFit.h:248

MethodRuleFit.h:249

MethodRuleFit.h:250

MethodRuleFit.h:251

MethodRuleFit.h:252

MethodRuleFit.h:253

MethodRuleFit.h:254

MethodRuleFit.h:255

MethodRuleFit.h:256

MethodRuleFit.h:257

MethodRuleFit.h:258

MethodRuleFit.h:259

MethodRuleFit.h:260

MethodRuleFit.h:261

MethodRuleFit.h:262

MethodRuleFit.h:263

MethodRuleFit.h:264

MethodRuleFit.h:265

MethodRuleFit.h:266

MethodRuleFit.h:267

MethodRuleFit.h:268

MethodRuleFit.h:269

MethodRuleFit.h:270

MethodRuleFit.h:271

MethodRuleFit.h:272

MethodRuleFit.h:273

MethodRuleFit.h:274

MethodRuleFit.h:275

MethodRuleFit.h:276

MethodRuleFit.h:277

MethodRuleFit.h:278