Logo ROOT  
Reference Guide
RuleFitAPI.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : RuleFitAPI *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Interface to Friedman's RuleFit method *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
16 * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
17 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
18 * *
19 * Copyright (c) 2005: *
20 * CERN, Switzerland *
21 * U. of Victoria, Canada *
22 * MPI-KP Heidelberg, Germany *
23 * LAPP, Annecy, France *
24 * *
25 * Redistribution and use in source and binary forms, with or without *
26 * modification, are permitted according to the terms listed in LICENSE *
27 * *
28 **********************************************************************************/
29
30#ifndef ROOT_TMVA_RuleFitAPI
31#define ROOT_TMVA_RuleFitAPI
32
33//////////////////////////////////////////////////////////////////////////
34// //
35// RuleFitAPI //
36// //
37// J Friedman's RuleFit method //
38// //
39//////////////////////////////////////////////////////////////////////////
40
41#include <fstream>
42
43#include "TMVA/MsgLogger.h"
44
45namespace TMVA {
46
47 class MethodRuleFit;
48 class RuleFit;
49
50 class RuleFitAPI {
51
52 public:
53
54 RuleFitAPI( const TMVA::MethodRuleFit *rfbase, TMVA::RuleFit *rulefit, EMsgType minType );
55
56 virtual ~RuleFitAPI();
57
58 // welcome message
59 void WelcomeMessage();
60
61 // message on howto get the binary
62 void HowtoSetupRF();
63
64 // Set RuleFit working directory
65 void SetRFWorkDir(const char * wdir);
66
67 // Check RF work dir - aborts if it fails
68 void CheckRFWorkDir();
69
70 // run rf_go.exe in various modes
71 inline void TrainRuleFit();
72 inline void TestRuleFit();
73 inline void VarImp();
74
75 // read result into MethodRuleFit
77
78 // Get working directory
79 const TString GetRFWorkDir() const { return fRFWorkDir; }
80
81 protected:
82
83 enum ERFMode { kRfRegress=1, kRfClass=2 }; // RuleFit modes, default=Class
84 enum EModel { kRfLinear=0, kRfRules=1, kRfBoth=2 }; // models, default=Both (rules+linear)
85 enum ERFProgram { kRfTrain=0, kRfPredict, kRfVarimp }; // rf_go.exe running mode
86
87 // integer parameters
88 typedef struct {
101 } IntParms;
102
103 // float parameters
104 typedef struct {
113 } RealParms;
114
115 // setup
116 void InitRuleFit();
117 void FillRealParmsDef();
118 void FillIntParmsDef();
119 void ImportSetup();
120 void SetTrainParms();
121 void SetTestParms();
122
123 // run
125
126 // set rf_go.exe running mode
130
131 // handle rulefit files
133 inline Bool_t OpenRFile(TString name, std::ofstream & f);
134 inline Bool_t OpenRFile(TString name, std::ifstream & f);
135
136 // read/write binary files
137 inline Bool_t WriteInt(std::ofstream & f, const Int_t *v, Int_t n=1);
138 inline Bool_t WriteFloat(std::ofstream & f, const Float_t *v, Int_t n=1);
139 inline Int_t ReadInt(std::ifstream & f, Int_t *v, Int_t n=1) const;
140 inline Int_t ReadFloat(std::ifstream & f, Float_t *v, Int_t n=1) const;
141
142 // write rf_go.exe i/o files
146 Bool_t WriteLx();
158
159 // read rf_go.exe i/o files
175
176 private:
177 // prevent empty constructor from being used
179 const MethodRuleFit *fMethodRuleFit; // parent method - set in constructor
180 RuleFit *fRuleFit; // non const ptr to RuleFit class in MethodRuleFit
181 //
182 std::vector<Float_t> fRFYhat; // score results from test sample
183 std::vector<Float_t> fRFVarImp; // variable importances
184 std::vector<Int_t> fRFVarImpInd; // variable index
185 TString fRFWorkDir; // working directory
186 IntParms fRFIntParms; // integer parameters
187 RealParms fRFRealParms; // real parameters
188 std::vector<int> fRFLx; // variable selector
189 ERFProgram fRFProgram; // what to run
190 TString fModelType; // model type string
191
192 mutable MsgLogger fLogger; // message logger
193
194 ClassDef(RuleFitAPI,0); // Friedman's RuleFit method
195
196 };
197
198} // namespace TMVA
199
200//_______________________________________________________________________
202{
203 // run rf_go.exe to train the model
205 WriteAll();
206 RunRuleFit();
207}
208
209//_______________________________________________________________________
211{
212 // run rf_go.exe with the test data
213 SetTestParms();
214 WriteAll();
215 RunRuleFit();
216 ReadYhat(); // read in the scores
217}
218
219//_______________________________________________________________________
221{
222 // run rf_go.exe to get the variable importance
223 SetRFVarimp();
224 WriteAll();
225 RunRuleFit();
226 ReadVarImp(); // read in the variable importances
227}
228
229//_______________________________________________________________________
231{
232 // get the name including the rulefit directory
233 return fRFWorkDir+"/"+name;
234}
235
236//_______________________________________________________________________
238{
239 // open a file for writing in the rulefit directory
240 TString fullName = GetRFName(name);
241 f.open(fullName);
242 if (!f.is_open()) {
243 fLogger << kERROR << "Error opening RuleFit file for output: "
244 << fullName << Endl;
245 return kFALSE;
246 }
247 return kTRUE;
248}
249
250//_______________________________________________________________________
252{
253 // open a file for reading in the rulefit directory
254 TString fullName = GetRFName(name);
255 f.open(fullName);
256 if (!f.is_open()) {
257 fLogger << kERROR << "Error opening RuleFit file for input: "
258 << fullName << Endl;
259 return kFALSE;
260 }
261 return kTRUE;
262}
263
264//_______________________________________________________________________
266{
267 // write an int
268 if (!f.is_open()) return kFALSE;
269 return (Bool_t)f.write(reinterpret_cast<char const *>(v), n*sizeof(Int_t));
270}
271
272//_______________________________________________________________________
274{
275 // write a float
276 if (!f.is_open()) return kFALSE;
277 return (Bool_t)f.write(reinterpret_cast<char const *>(v), n*sizeof(Float_t));
278}
279
280//_______________________________________________________________________
281Int_t TMVA::RuleFitAPI::ReadInt(std::ifstream & f, Int_t *v, Int_t n) const
282{
283 // read an int
284 if (!f.is_open()) return 0;
285 if (f.read(reinterpret_cast<char *>(v), n*sizeof(Int_t))) return 1;
286 return 0;
287}
288
289//_______________________________________________________________________
291{
292 // read a float
293 if (!f.is_open()) return 0;
294 if (f.read(reinterpret_cast<char *>(v), n*sizeof(Float_t))) return 1;
295 return 0;
296}
297
298#endif // RuleFitAPI_H
#define f(i)
Definition: RSha256.hxx:104
int Int_t
Definition: RtypesCore.h:41
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
float Float_t
Definition: RtypesCore.h:53
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassDef(name, id)
Definition: Rtypes.h:326
char name[80]
Definition: TGX11.cxx:109
J Friedman's RuleFit method.
Definition: MethodRuleFit.h:47
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
J Friedman's RuleFit method.
Definition: RuleFitAPI.h:50
void SetTestParms()
set the test params
Definition: RuleFitAPI.cxx:203
Bool_t ReadRfStatus()
TString fModelType
Definition: RuleFitAPI.h:190
Bool_t WriteRuleFitSum()
written by rf_go.exe (NOTE: format unknown!)
Definition: RuleFitAPI.cxx:376
Bool_t WriteYhat()
written by rf_go.exe
Definition: RuleFitAPI.cxx:475
Int_t ReadFloat(std::ifstream &f, Float_t *v, Int_t n=1) const
Definition: RuleFitAPI.h:290
Bool_t WriteAll()
write all files read by rf_go.exe
Definition: RuleFitAPI.cxx:250
void ImportSetup()
import setup from MethodRuleFit
Definition: RuleFitAPI.cxx:134
Bool_t WriteRfStatus()
written by rf_go.exe; write rulefit status
Definition: RuleFitAPI.cxx:358
Bool_t WriteIntParms()
write int params file
Definition: RuleFitAPI.cxx:266
void CheckRFWorkDir()
check if the rulefit work dir is properly setup.
Definition: RuleFitAPI.cxx:168
Bool_t WriteProgram()
write command to rf_go.exe
Definition: RuleFitAPI.cxx:307
Bool_t ReadModelSum()
read model from rulefit.sum
Definition: RuleFitAPI.cxx:546
Bool_t WriteVarImp()
Definition: RuleFitAPI.cxx:464
void SetRFWorkDir(const char *wdir)
set the directory containing rf_go.exe.
Definition: RuleFitAPI.cxx:157
Bool_t ReadVarImp()
read variable importance
Definition: RuleFitAPI.cxx:509
Bool_t ReadTrainW()
Bool_t WriteRuleFitMod()
written by rf_go.exe (NOTE:Format unknown!)
Definition: RuleFitAPI.cxx:367
Bool_t WriteRfOut()
written by rf_go.exe; write rulefit output (rfout)
Definition: RuleFitAPI.cxx:349
Bool_t ReadRealVarImp()
void TestRuleFit()
Definition: RuleFitAPI.h:210
TString GetRFName(TString name)
Definition: RuleFitAPI.h:230
RealParms fRFRealParms
Definition: RuleFitAPI.h:187
RuleFit * fRuleFit
Definition: RuleFitAPI.h:180
Bool_t OpenRFile(TString name, std::ofstream &f)
Definition: RuleFitAPI.h:237
Bool_t ReadIntParms()
void InitRuleFit()
default initialisation SetRFWorkDir("./rulefit");
Definition: RuleFitAPI.cxx:124
void FillRealParmsDef()
set default real params
Definition: RuleFitAPI.cxx:215
std::vector< Float_t > fRFVarImp
Definition: RuleFitAPI.h:183
Bool_t WriteVarNames()
write variable names, ascii
Definition: RuleFitAPI.cxx:452
ERFProgram fRFProgram
Definition: RuleFitAPI.h:189
Bool_t WriteRealVarImp()
write the minimum importance to be considered
Definition: RuleFitAPI.cxx:335
void FillIntParmsDef()
set default int params
Definition: RuleFitAPI.cxx:230
void WelcomeMessage()
welcome message
Definition: RuleFitAPI.cxx:78
void TrainRuleFit()
Definition: RuleFitAPI.h:201
Bool_t WriteTrain()
write training data, column wise
Definition: RuleFitAPI.cxx:385
virtual ~RuleFitAPI()
destructor
Definition: RuleFitAPI.cxx:71
Bool_t ReadRfOut()
Bool_t WriteRealParms()
write int params file
Definition: RuleFitAPI.cxx:277
Bool_t WriteFloat(std::ofstream &f, const Float_t *v, Int_t n=1)
Definition: RuleFitAPI.h:273
Bool_t ReadProgram()
MsgLogger fLogger
Definition: RuleFitAPI.h:192
Bool_t ReadRuleFitMod()
const MethodRuleFit * fMethodRuleFit
Definition: RuleFitAPI.h:179
Int_t ReadInt(std::ifstream &f, Int_t *v, Int_t n=1) const
Definition: RuleFitAPI.h:281
IntParms fRFIntParms
Definition: RuleFitAPI.h:186
Bool_t WriteLx()
Save input variable mask.
Definition: RuleFitAPI.cxx:293
TString fRFWorkDir
Definition: RuleFitAPI.h:185
Bool_t ReadYhat()
read the score
Definition: RuleFitAPI.cxx:484
void HowtoSetupRF()
howto message
Definition: RuleFitAPI.cxx:94
Bool_t ReadTrainX()
std::vector< Float_t > fRFYhat
Definition: RuleFitAPI.h:182
std::vector< Int_t > fRFVarImpInd
Definition: RuleFitAPI.h:184
Bool_t ReadVarNames()
std::vector< int > fRFLx
Definition: RuleFitAPI.h:188
Bool_t WriteTest()
Write test data.
Definition: RuleFitAPI.cxx:420
void SetRFPredict()
Definition: RuleFitAPI.h:128
void SetTrainParms()
set the training parameters
Definition: RuleFitAPI.cxx:190
void SetRFVarimp()
Definition: RuleFitAPI.h:129
Bool_t WriteInt(std::ofstream &f, const Int_t *v, Int_t n=1)
Definition: RuleFitAPI.h:265
const TString GetRFWorkDir() const
Definition: RuleFitAPI.h:79
Bool_t ReadTrainY()
Bool_t ReadRealParms()
Int_t RunRuleFit()
execute rf_go.exe
Definition: RuleFitAPI.cxx:774
Bool_t ReadRuleFitSum()
A class implementing various fits of rule ensembles.
Definition: RuleFit.h:45
Basic string class.
Definition: TString.h:131
const Int_t n
Definition: legend1.C:16
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158