ROOT logo
// @(#)root/tmva $Id: MethodRuleFit.h 39395 2011-05-26 10:05:54Z moneta $
// Author: Fredrik Tegenfeldt

/**********************************************************************************
 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
 * Package: TMVA                                                                  *
 * Class  : MethodRuleFit                                                         *
 * Web    : http://tmva.sourceforge.net                                           *
 *                                                                                *
 * Description:                                                                   *
 *      Friedman's RuleFit method                                                 * 
 *                                                                                *
 * Authors (alphabetical):                                                        *
 *      Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA      *
 *                                                                                *
 * Copyright (c) 2005:                                                            *
 *      CERN, Switzerland                                                         * 
 *      Iowa State U.                                                             *
 *      MPI-K Heidelberg, Germany                                                 * 
 *                                                                                *
 * Redistribution and use in source and binary forms, with or without             *
 * modification, are permitted according to the terms listed in LICENSE           *
 *                                                                                *
 **********************************************************************************/

#ifndef ROOT_TMVA_MethodRuleFit
#define ROOT_TMVA_MethodRuleFit

//////////////////////////////////////////////////////////////////////////
//                                                                      //
// MethodRuleFit                                                        //
//                                                                      //
// J Friedman's RuleFit method                                          //
//                                                                      //
//////////////////////////////////////////////////////////////////////////

#ifndef ROOT_TMVA_MethodBase
#include "TMVA/MethodBase.h"
#endif
#ifndef ROOT_TMatrixDfwd
#include "TMatrixDfwd.h"
#endif
#ifndef ROOT_TVectorD
#include "TVectorD.h"
#endif
#ifndef ROOT_TMVA_DecisionTree
#include "TMVA/DecisionTree.h"
#endif
#ifndef ROOT_TMVA_RuleFit
#include "TMVA/RuleFit.h"
#endif

namespace TMVA {

   class SeparationBase;

   class MethodRuleFit : public MethodBase {

   public:

      MethodRuleFit( const TString& jobName,
                     const TString& methodTitle, 
                     DataSetInfo& theData,
                     const TString& theOption = "",
                     TDirectory* theTargetDir = 0 );

      MethodRuleFit( DataSetInfo& theData,
                     const TString& theWeightFile,
                     TDirectory* theTargetDir = NULL );

      virtual ~MethodRuleFit( void );

      virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ );

      // training method
      void Train( void );

      using MethodBase::ReadWeightsFromStream;

      // write weights to file
      void AddWeightsXMLTo     ( void* parent ) const;

      // read weights from file
      void ReadWeightsFromStream( istream& istr );
      void ReadWeightsFromXML   ( void* wghtnode );

      // calculate the MVA value
      Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );

      // write method specific histos to target file
      void WriteMonitoringHistosToFile( void ) const;

      // ranking of input variables
      const Ranking* CreateRanking();

      Bool_t                                   UseBoost()           const   { return fUseBoost; }

      // accessors
      RuleFit*                                 GetRuleFitPtr()              { return &fRuleFit; }
      const RuleFit*                           GetRuleFitConstPtr() const   { return &fRuleFit; }
      TDirectory*                              GetMethodBaseDir()   const   { return BaseDir(); }
      const std::vector<TMVA::Event*>&         GetTrainingEvents()  const   { return fEventSample; }
      const std::vector<TMVA::DecisionTree*>&  GetForest()          const   { return fForest; }
      Int_t                                    GetNTrees()          const   { return fNTrees; }
      Double_t                                 GetTreeEveFrac()     const   { return fTreeEveFrac; }
      const SeparationBase*                    GetSeparationBaseConst() const { return fSepType; }
      SeparationBase*                          GetSeparationBase()  const   { return fSepType; }
      TMVA::DecisionTree::EPruneMethod         GetPruneMethod()     const   { return fPruneMethod; }
      Double_t                                 GetPruneStrength()   const   { return fPruneStrength; }
      Double_t                                 GetMinFracNEve()     const   { return fMinFracNEve; }
      Double_t                                 GetMaxFracNEve()     const   { return fMaxFracNEve; }
      Int_t                                    GetNCuts()           const   { return fNCuts; }
      //
      Int_t                                    GetGDNPathSteps()    const   { return fGDNPathSteps; }
      Double_t                                 GetGDPathStep()      const   { return fGDPathStep; }
      Double_t                                 GetGDErrScale()      const   { return fGDErrScale; }
      Double_t                                 GetGDPathEveFrac()   const   { return fGDPathEveFrac; }
      Double_t                                 GetGDValidEveFrac()  const   { return fGDValidEveFrac; }
      //
      Double_t                                 GetLinQuantile()     const   { return fLinQuantile; }

      const TString                            GetRFWorkDir()       const   { return fRFWorkDir; }
      Int_t                                    GetRFNrules()        const   { return fRFNrules; }
      Int_t                                    GetRFNendnodes()     const   { return fRFNendnodes; }

   protected:

      // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
      void MakeClassSpecific( std::ostream&, const TString& ) const;

      void MakeClassRuleCuts( std::ostream& ) const;

      void MakeClassLinear( std::ostream& ) const;

      // get help message text
      void GetHelpMessage() const;

      // initialize rulefit
      void Init( void );

      // copy all training events into a stl::vector
      void InitEventSample( void );

      // initialize monitor ntuple
      void InitMonitorNtuple();

      void TrainTMVARuleFit();
      void TrainJFRuleFit();

   private:

      // check variable range and set var to lower or upper if out of range
      template<typename T>
      inline Bool_t VerifyRange( MsgLogger& mlog, const char *varstr, T& var, const T& vmin, const T& vmax );

      template<typename T>
      inline Bool_t VerifyRange( MsgLogger& mlog, const char *varstr, T& var, const T& vmin, const T& vmax, const T& vdef );

      template<typename T>
      inline Int_t VerifyRange( const T& var, const T& vmin, const T& vmax );

      // the option handling methods
      void DeclareOptions();
      void ProcessOptions();

      RuleFit                      fRuleFit;        // RuleFit instance
      std::vector<TMVA::Event *>   fEventSample;    // the complete training sample
      Double_t                     fSignalFraction; // scalefactor for bkg events to modify initial s/b fraction in training data

      // ntuple
      TTree                       *fMonitorNtuple;  // pointer to monitor rule ntuple
      Double_t                     fNTImportance;   // ntuple: rule importance
      Double_t                     fNTCoefficient;  // ntuple: rule coefficient
      Double_t                     fNTSupport;      // ntuple: rule support
      Int_t                        fNTNcuts;        // ntuple: rule number of cuts
      Int_t                        fNTNvars;        // ntuple: rule number of vars
      Double_t                     fNTPtag;         // ntuple: rule P(tag)
      Double_t                     fNTPss;          // ntuple: rule P(tag s, true s)
      Double_t                     fNTPsb;          // ntuple: rule P(tag s, true b)
      Double_t                     fNTPbs;          // ntuple: rule P(tag b, true s)
      Double_t                     fNTPbb;          // ntuple: rule P(tag b, true b)
      Double_t                     fNTSSB;          // ntuple: rule S/(S+B)
      Int_t                        fNTType;         // ntuple: rule type (+1->signal, -1->bkg)

      // options
      TString                      fRuleFitModuleS;// which rulefit module to use
      Bool_t                       fUseRuleFitJF;  // if true interface with J.Friedmans RuleFit module
      TString                      fRFWorkDir;     // working directory from Friedmans module
      Int_t                        fRFNrules;      // max number of rules (only Friedmans module)
      Int_t                        fRFNendnodes;   // max number of rules (only Friedmans module)
      std::vector<DecisionTree *>  fForest;        // the forest
      Int_t                        fNTrees;        // number of trees in forest
      Double_t                     fTreeEveFrac;   // fraction of events used for traing each tree
      SeparationBase              *fSepType;       // the separation used in node splitting
      Double_t                     fMinFracNEve;   // min fraction of number events
      Double_t                     fMaxFracNEve;   // ditto max
      Int_t                        fNCuts;         // grid used in cut applied in node splitting
      TString                      fSepTypeS;        // forest generation: separation type - see DecisionTree
      TString                      fPruneMethodS;    // forest generation: prune method - see DecisionTree
      TMVA::DecisionTree::EPruneMethod fPruneMethod; // forest generation: method used for pruning - see DecisionTree 
      Double_t                     fPruneStrength;   // forest generation: prune strength - see DecisionTree
      TString                      fForestTypeS;     // forest generation: how the trees are generated
      Bool_t                       fUseBoost;        // use boosted events for forest generation
      //
      Double_t                     fGDPathEveFrac; //  GD path: fraction of subsamples used for the fitting
      Double_t                     fGDValidEveFrac; // GD path: fraction of subsamples used for the fitting
      Double_t                     fGDTau;          // GD path: def threshhold fraction [0..1]
      Double_t                     fGDTauPrec;      // GD path: precision of estimated tau
      Double_t                     fGDTauMin;       // GD path: min threshhold fraction [0..1]
      Double_t                     fGDTauMax;       // GD path: max threshhold fraction [0..1]
      UInt_t                       fGDTauScan;      // GD path: number of points to scan
      Double_t                     fGDPathStep;     // GD path: step size in path
      Int_t                        fGDNPathSteps;   // GD path: number of steps
      Double_t                     fGDErrScale;     // GD path: stop 
      Double_t                     fMinimp;         // rule/linear: minimum importance
      //
      TString                      fModelTypeS;     // rule ensemble: which model (rule,linear or both)
      Double_t                     fRuleMinDist;    // rule min distance - see RuleEnsemble
      Double_t                     fLinQuantile;    // quantile cut to remove outliers - see RuleEnsemble

      ClassDef(MethodRuleFit,0)  // Friedman's RuleFit method
   };

} // namespace TMVA


//_______________________________________________________________________
template<typename T>
inline Int_t TMVA::MethodRuleFit::VerifyRange( const T& var, const T& vmin, const T& vmax )
{
   // check range and return +1 if above, -1 if below or 0 if inside
   if (var>vmax) return  1;
   if (var<vmin) return -1;
   return 0;
}

//_______________________________________________________________________
template<typename T>
inline Bool_t TMVA::MethodRuleFit::VerifyRange( TMVA::MsgLogger& mlog, const char *varstr, T& var, const T& vmin, const T& vmax )
{
   // verify range and print out message
   // if outside range, set to closest limit
   Int_t dir = TMVA::MethodRuleFit::VerifyRange(var,vmin,vmax);
   Bool_t modif=kFALSE;
   if (dir==1) {
      modif = kTRUE;
      var=vmax;
   }
   if (dir==-1) {
      modif = kTRUE;
      var=vmin;
   }
   if (modif) {
      mlog << kWARNING << "Option <" << varstr << "> " << (dir==1 ? "above":"below") << " allowed range. Reset to new value = " << var << Endl;
   }
   return modif;
}

//_______________________________________________________________________
template<typename T>
inline Bool_t TMVA::MethodRuleFit::VerifyRange( TMVA::MsgLogger& mlog, const char *varstr, T& var, const T& vmin, const T& vmax, const T& vdef )
{
   // verify range and print out message
   // if outside range, set to given default value
   Int_t dir = TMVA::MethodRuleFit::VerifyRange(var,vmin,vmax);
   Bool_t modif=kFALSE;
   if (dir!=0) {
      modif = kTRUE;
      var=vdef;
   }
   if (modif) {
      mlog << kWARNING << "Option <" << varstr << "> " << (dir==1 ? "above":"below") << " allowed range. Reset to default value = " << var << Endl;
   }
   return modif;
}


#endif // MethodRuleFit_H
 MethodRuleFit.h:1
 MethodRuleFit.h:2
 MethodRuleFit.h:3
 MethodRuleFit.h:4
 MethodRuleFit.h:5
 MethodRuleFit.h:6
 MethodRuleFit.h:7
 MethodRuleFit.h:8
 MethodRuleFit.h:9
 MethodRuleFit.h:10
 MethodRuleFit.h:11
 MethodRuleFit.h:12
 MethodRuleFit.h:13
 MethodRuleFit.h:14
 MethodRuleFit.h:15
 MethodRuleFit.h:16
 MethodRuleFit.h:17
 MethodRuleFit.h:18
 MethodRuleFit.h:19
 MethodRuleFit.h:20
 MethodRuleFit.h:21
 MethodRuleFit.h:22
 MethodRuleFit.h:23
 MethodRuleFit.h:24
 MethodRuleFit.h:25
 MethodRuleFit.h:26
 MethodRuleFit.h:27
 MethodRuleFit.h:28
 MethodRuleFit.h:29
 MethodRuleFit.h:30
 MethodRuleFit.h:31
 MethodRuleFit.h:32
 MethodRuleFit.h:33
 MethodRuleFit.h:34
 MethodRuleFit.h:35
 MethodRuleFit.h:36
 MethodRuleFit.h:37
 MethodRuleFit.h:38
 MethodRuleFit.h:39
 MethodRuleFit.h:40
 MethodRuleFit.h:41
 MethodRuleFit.h:42
 MethodRuleFit.h:43
 MethodRuleFit.h:44
 MethodRuleFit.h:45
 MethodRuleFit.h:46
 MethodRuleFit.h:47
 MethodRuleFit.h:48
 MethodRuleFit.h:49
 MethodRuleFit.h:50
 MethodRuleFit.h:51
 MethodRuleFit.h:52
 MethodRuleFit.h:53
 MethodRuleFit.h:54
 MethodRuleFit.h:55
 MethodRuleFit.h:56
 MethodRuleFit.h:57
 MethodRuleFit.h:58
 MethodRuleFit.h:59
 MethodRuleFit.h:60
 MethodRuleFit.h:61
 MethodRuleFit.h:62
 MethodRuleFit.h:63
 MethodRuleFit.h:64
 MethodRuleFit.h:65
 MethodRuleFit.h:66
 MethodRuleFit.h:67
 MethodRuleFit.h:68
 MethodRuleFit.h:69
 MethodRuleFit.h:70
 MethodRuleFit.h:71
 MethodRuleFit.h:72
 MethodRuleFit.h:73
 MethodRuleFit.h:74
 MethodRuleFit.h:75
 MethodRuleFit.h:76
 MethodRuleFit.h:77
 MethodRuleFit.h:78
 MethodRuleFit.h:79
 MethodRuleFit.h:80
 MethodRuleFit.h:81
 MethodRuleFit.h:82
 MethodRuleFit.h:83
 MethodRuleFit.h:84
 MethodRuleFit.h:85
 MethodRuleFit.h:86
 MethodRuleFit.h:87
 MethodRuleFit.h:88
 MethodRuleFit.h:89
 MethodRuleFit.h:90
 MethodRuleFit.h:91
 MethodRuleFit.h:92
 MethodRuleFit.h:93
 MethodRuleFit.h:94
 MethodRuleFit.h:95
 MethodRuleFit.h:96
 MethodRuleFit.h:97
 MethodRuleFit.h:98
 MethodRuleFit.h:99
 MethodRuleFit.h:100
 MethodRuleFit.h:101
 MethodRuleFit.h:102
 MethodRuleFit.h:103
 MethodRuleFit.h:104
 MethodRuleFit.h:105
 MethodRuleFit.h:106
 MethodRuleFit.h:107
 MethodRuleFit.h:108
 MethodRuleFit.h:109
 MethodRuleFit.h:110
 MethodRuleFit.h:111
 MethodRuleFit.h:112
 MethodRuleFit.h:113
 MethodRuleFit.h:114
 MethodRuleFit.h:115
 MethodRuleFit.h:116
 MethodRuleFit.h:117
 MethodRuleFit.h:118
 MethodRuleFit.h:119
 MethodRuleFit.h:120
 MethodRuleFit.h:121
 MethodRuleFit.h:122
 MethodRuleFit.h:123
 MethodRuleFit.h:124
 MethodRuleFit.h:125
 MethodRuleFit.h:126
 MethodRuleFit.h:127
 MethodRuleFit.h:128
 MethodRuleFit.h:129
 MethodRuleFit.h:130
 MethodRuleFit.h:131
 MethodRuleFit.h:132
 MethodRuleFit.h:133
 MethodRuleFit.h:134
 MethodRuleFit.h:135
 MethodRuleFit.h:136
 MethodRuleFit.h:137
 MethodRuleFit.h:138
 MethodRuleFit.h:139
 MethodRuleFit.h:140
 MethodRuleFit.h:141
 MethodRuleFit.h:142
 MethodRuleFit.h:143
 MethodRuleFit.h:144
 MethodRuleFit.h:145
 MethodRuleFit.h:146
 MethodRuleFit.h:147
 MethodRuleFit.h:148
 MethodRuleFit.h:149
 MethodRuleFit.h:150
 MethodRuleFit.h:151
 MethodRuleFit.h:152
 MethodRuleFit.h:153
 MethodRuleFit.h:154
 MethodRuleFit.h:155
 MethodRuleFit.h:156
 MethodRuleFit.h:157
 MethodRuleFit.h:158
 MethodRuleFit.h:159
 MethodRuleFit.h:160
 MethodRuleFit.h:161
 MethodRuleFit.h:162
 MethodRuleFit.h:163
 MethodRuleFit.h:164
 MethodRuleFit.h:165
 MethodRuleFit.h:166
 MethodRuleFit.h:167
 MethodRuleFit.h:168
 MethodRuleFit.h:169
 MethodRuleFit.h:170
 MethodRuleFit.h:171
 MethodRuleFit.h:172
 MethodRuleFit.h:173
 MethodRuleFit.h:174
 MethodRuleFit.h:175
 MethodRuleFit.h:176
 MethodRuleFit.h:177
 MethodRuleFit.h:178
 MethodRuleFit.h:179
 MethodRuleFit.h:180
 MethodRuleFit.h:181
 MethodRuleFit.h:182
 MethodRuleFit.h:183
 MethodRuleFit.h:184
 MethodRuleFit.h:185
 MethodRuleFit.h:186
 MethodRuleFit.h:187
 MethodRuleFit.h:188
 MethodRuleFit.h:189
 MethodRuleFit.h:190
 MethodRuleFit.h:191
 MethodRuleFit.h:192
 MethodRuleFit.h:193
 MethodRuleFit.h:194
 MethodRuleFit.h:195
 MethodRuleFit.h:196
 MethodRuleFit.h:197
 MethodRuleFit.h:198
 MethodRuleFit.h:199
 MethodRuleFit.h:200
 MethodRuleFit.h:201
 MethodRuleFit.h:202
 MethodRuleFit.h:203
 MethodRuleFit.h:204
 MethodRuleFit.h:205
 MethodRuleFit.h:206
 MethodRuleFit.h:207
 MethodRuleFit.h:208
 MethodRuleFit.h:209
 MethodRuleFit.h:210
 MethodRuleFit.h:211
 MethodRuleFit.h:212
 MethodRuleFit.h:213
 MethodRuleFit.h:214
 MethodRuleFit.h:215
 MethodRuleFit.h:216
 MethodRuleFit.h:217
 MethodRuleFit.h:218
 MethodRuleFit.h:219
 MethodRuleFit.h:220
 MethodRuleFit.h:221
 MethodRuleFit.h:222
 MethodRuleFit.h:223
 MethodRuleFit.h:224
 MethodRuleFit.h:225
 MethodRuleFit.h:226
 MethodRuleFit.h:227
 MethodRuleFit.h:228
 MethodRuleFit.h:229
 MethodRuleFit.h:230
 MethodRuleFit.h:231
 MethodRuleFit.h:232
 MethodRuleFit.h:233
 MethodRuleFit.h:234
 MethodRuleFit.h:235
 MethodRuleFit.h:236
 MethodRuleFit.h:237
 MethodRuleFit.h:238
 MethodRuleFit.h:239
 MethodRuleFit.h:240
 MethodRuleFit.h:241
 MethodRuleFit.h:242
 MethodRuleFit.h:243
 MethodRuleFit.h:244
 MethodRuleFit.h:245
 MethodRuleFit.h:246
 MethodRuleFit.h:247
 MethodRuleFit.h:248
 MethodRuleFit.h:249
 MethodRuleFit.h:250
 MethodRuleFit.h:251
 MethodRuleFit.h:252
 MethodRuleFit.h:253
 MethodRuleFit.h:254
 MethodRuleFit.h:255
 MethodRuleFit.h:256
 MethodRuleFit.h:257
 MethodRuleFit.h:258
 MethodRuleFit.h:259
 MethodRuleFit.h:260
 MethodRuleFit.h:261
 MethodRuleFit.h:262
 MethodRuleFit.h:263
 MethodRuleFit.h:264
 MethodRuleFit.h:265
 MethodRuleFit.h:266
 MethodRuleFit.h:267
 MethodRuleFit.h:268
 MethodRuleFit.h:269
 MethodRuleFit.h:270
 MethodRuleFit.h:271
 MethodRuleFit.h:272
 MethodRuleFit.h:273
 MethodRuleFit.h:274
 MethodRuleFit.h:275
 MethodRuleFit.h:276
 MethodRuleFit.h:277
 MethodRuleFit.h:278