Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RuleFitAPI.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : RuleFitAPI *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header file for description) *
12 * *
13 * Authors (alphabetical): *
14 * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15 * *
16 * Copyright (c) 2005: *
17 * CERN, Switzerland *
18 * Iowa State U. *
19 * MPI-KP Heidelberg, Germany *
20 * *
21 * Redistribution and use in source and binary forms, with or without *
22 * modification, are permitted according to the terms listed in LICENSE *
23 * (http://tmva.sourceforge.net/LICENSE) *
24 **********************************************************************************/
25
26/*! \class TMVA::RuleFitAPI
27\ingroup TMVA
28J Friedman's RuleFit method
29*/
30
31#include "TMVA/RuleFitAPI.h"
32
33#include "TMVA/DataSet.h"
34#include "TMVA/DataSetInfo.h"
35#include "TMVA/MethodRuleFit.h"
36#include "TMVA/RuleFit.h"
37#include "TMVA/Timer.h"
38#include "TMVA/Tools.h"
39#include "TMVA/Types.h"
40#include "TMVA/VariableInfo.h"
41
42#include "TSystem.h"
43
44#include <algorithm>
45
47
49 RuleFit *rulefit,
50 EMsgType minType = kINFO ) :
51fMethodRuleFit(rfbase),
52 fRuleFit(rulefit),
53 fRFProgram(kRfTrain),
54 fLogger("RuleFitAPI",minType)
55{
56 // standard constructor
57 if (rfbase) {
58 SetRFWorkDir(rfbase->GetRFWorkDir());
59 } else {
60 SetRFWorkDir("./rulefit");
61 }
63}
64
65
66////////////////////////////////////////////////////////////////////////////////
67/// destructor
68
70{
71}
72
73////////////////////////////////////////////////////////////////////////////////
74/// welcome message
75
77{
78 fLogger << kINFO
79 << "\n"
80 << "---------------------------------------------------------------------------\n"
81 << "- You are running the interface to Jerome Friedmans RuleFit(tm) code. -\n"
82 << "- For a full manual see the following web page: -\n"
83 << "- -\n"
84 << "- http://www-stat.stanford.edu/~jhf/R-RuleFit.html -\n"
85 << "- -\n"
86 << "---------------------------------------------------------------------------"
87 << Endl;
88}
89////////////////////////////////////////////////////////////////////////////////
90/// howto message
91
93{
94 fLogger << kINFO
95 << "\n"
96 << "------------------------ RULEFIT-JF INTERFACE SETUP -----------------------\n"
97 << "\n"
98 << "1. Create a rulefit directory in your current work directory:\n"
99 << " mkdir " << fRFWorkDir << "\n\n"
100 << " the directory may be set using the option RuleFitDir\n"
101 << "\n"
102 << "2. Copy (or make a link) the file rf_go.exe into this directory\n"
103 << "\n"
104 << "The file can be obtained from Jerome Friedmans homepage (linux):\n"
105 << " wget http://www-stat.stanford.edu/~jhf/r-rulefit/linux/rf_go.exe\n"
106 << "\n"
107 << "Don't forget to do:\n"
108 << " chmod +x rf_go.exe\n"
109 << "\n"
110 << "For Windows download:\n"
111 << " http://www-stat.stanford.edu/~jhf/r-rulefit/windows/rf_go.exe\n"
112 << "\n"
113 << "NOTE: other platforms are not supported (see Friedmans homepage)\n"
114 << "\n"
115 << "---------------------------------------------------------------------------\n"
116 << Endl;
117}
118////////////////////////////////////////////////////////////////////////////////
119/// default initialisation
120/// SetRFWorkDir("./rulefit");
121
123{
124 CheckRFWorkDir();
125 FillIntParmsDef();
126 FillRealParmsDef();
127}
128
129////////////////////////////////////////////////////////////////////////////////
130/// import setup from MethodRuleFit
131
133{
134 fRFIntParms.p = fMethodRuleFit->DataInfo().GetNVariables();
135 fRFIntParms.max_rules = fMethodRuleFit->GetRFNrules();
136 fRFIntParms.tree_size = fMethodRuleFit->GetRFNendnodes();
137 fRFIntParms.path_steps = fMethodRuleFit->GetGDNPathSteps();
138 //
139 fRFRealParms.path_inc = fMethodRuleFit->GetGDPathStep();
140 fRFRealParms.samp_fract = fMethodRuleFit->GetTreeEveFrac();
141 fRFRealParms.trim_qntl = fMethodRuleFit->GetLinQuantile();
142 fRFRealParms.conv_fac = fMethodRuleFit->GetGDErrScale();
143 //
144 if (fRuleFit->GetRuleEnsemblePtr()->DoOnlyLinear() )
145 fRFIntParms.lmode = kRfLinear;
146 else if (fRuleFit->GetRuleEnsemblePtr()->DoOnlyRules() )
147 fRFIntParms.lmode = kRfRules;
148 else
149 fRFIntParms.lmode = kRfBoth;
150}
151
152////////////////////////////////////////////////////////////////////////////////
153/// set the directory containing rf_go.exe.
154
155void TMVA::RuleFitAPI::SetRFWorkDir(const char * wdir)
156{
157 fRFWorkDir = wdir;
158}
159
160////////////////////////////////////////////////////////////////////////////////
161/// check if the rulefit work dir is properly setup.
162/// it aborts (kFATAL) if not.
163///
164/// Check existence of directory
165
167{
168 TString oldDir = gSystem->pwd();
169 if (!gSystem->cd(fRFWorkDir)) {
170 fLogger << kWARNING << "Must create a rulefit directory named : " << fRFWorkDir << Endl;
171 HowtoSetupRF();
172 fLogger << kFATAL << "Setup failed - aborting!" << Endl;
173 }
174 // check rf_go.exe
175 FILE *f = fopen("rf_go.exe","r");
176 if (f==0) {
177 fLogger << kWARNING << "No rf_go.exe file in directory : " << fRFWorkDir << Endl;
178 HowtoSetupRF();
179 fLogger << kFATAL << "Setup failed - aborting!" << Endl;
180 }
181 fclose(f);
182 gSystem->cd(oldDir.Data());
183}
184
185////////////////////////////////////////////////////////////////////////////////
186/// set the training parameters
187
189{
190 ImportSetup();
191 //
192 Int_t n = fMethodRuleFit->Data()->GetNTrainingEvents();
193 // Double_t neff = Double_t(n); // When weights are added: should be sum(wt)^2/sum(wt^2)
194 fRFIntParms.n = n; // number of data points in tree
195 fRFProgram = kRfTrain;
196}
197
198////////////////////////////////////////////////////////////////////////////////
199/// set the test params
200
202{
203 ImportSetup();
204 Int_t n = fMethodRuleFit->Data()->GetNTestEvents();
205 // Double_t neff = Double_t(n); // When weights are added: should be sum(wt)^2/sum(wt^2)
206 fRFIntParms.n = n; // number of data points in tree
207 fRFProgram = kRfPredict;
208}
209
210////////////////////////////////////////////////////////////////////////////////
211/// set default real params
212
214{
215 fRFRealParms.xmiss = 9.0e30;
216 fRFRealParms.trim_qntl = 0.025;
217 fRFRealParms.huber = 0.8;
218 fRFRealParms.inter_supp = 3.0;
219 fRFRealParms.memory_par = 0.01;
220 fRFRealParms.samp_fract = 0.5; // calculated later
221 fRFRealParms.path_inc = 0.01;
222 fRFRealParms.conv_fac = 1.1;
223}
224
225////////////////////////////////////////////////////////////////////////////////
226/// set default int params
227
229{
230 fRFIntParms.mode = (int)kRfClass;
231 fRFIntParms.lmode = (int)kRfBoth;
232 // fRFIntParms.n;
233 // fRFIntParms.p;
234 fRFIntParms.max_rules = 2000;
235 fRFIntParms.tree_size = 4;
236 fRFIntParms.path_speed = 2;
237 fRFIntParms.path_xval = 3;
238 fRFIntParms.path_steps = 50000;
239 fRFIntParms.path_testfreq = 100;
240 fRFIntParms.tree_store = 10000000;
241 fRFIntParms.cat_store = 1000000;
242
243}
244
245////////////////////////////////////////////////////////////////////////////////
246/// write all files read by rf_go.exe
247
249{
250 WriteIntParms();
251 WriteRealParms();
252 WriteLx();
253 WriteProgram();
254 WriteVarNames();
255 if (fRFProgram==kRfTrain) WriteTrain();
256 if (fRFProgram==kRfPredict) WriteTest();
257 if (fRFProgram==kRfVarimp) WriteRealVarImp();
258 return kTRUE;
259}
260
261////////////////////////////////////////////////////////////////////////////////
262/// write int params file
263
265{
266 std::ofstream f;
267 if (!OpenRFile("intparms",f)) return kFALSE;
268 WriteInt(f,&fRFIntParms.mode,sizeof(fRFIntParms)/sizeof(Int_t));
269 return kTRUE;
270}
271
272////////////////////////////////////////////////////////////////////////////////
273/// write int params file
274
276{
277 std::ofstream f;
278 if (!OpenRFile("realparms",f)) return kFALSE;
279 WriteFloat(f,&fRFRealParms.xmiss,sizeof(fRFRealParms)/sizeof(Float_t));
280 return kTRUE;
281}
282
283////////////////////////////////////////////////////////////////////////////////
284/// Save input variable mask
285///
286/// If the lx vector size is not the same as inputVars,
287/// resize it and fill it with 1
288/// NOTE: Always set all to 1
289/// if (fRFLx.size() != m_inputVars->size()) {
290
292{
293 fRFLx.clear();
294 fRFLx.resize(fMethodRuleFit->DataInfo().GetNVariables(),1);
295 // }
296 std::ofstream f;
297 if (!OpenRFile("lx",f)) return kFALSE;
298 WriteInt(f,&fRFLx[0],fRFLx.size());
299 return kTRUE;
300}
301
302////////////////////////////////////////////////////////////////////////////////
303/// write command to rf_go.exe
304
306{
307 std::ofstream f;
308 if (!OpenRFile("program",f)) return kFALSE;
309 TString program;
310 switch (fRFProgram) {
311 case kRfTrain:
312 program = "rulefit";
313 break;
314 case kRfPredict:
315 program = "rulefit_pred";
316 break;
317 // calculate variable importance
318 case kRfVarimp:
319 program = "varimp";
320 break;
321 default:
322 fRFProgram = kRfTrain;
323 program="rulefit";
324 break;
325 }
326 f << program;
327 return kTRUE;
328}
329
330////////////////////////////////////////////////////////////////////////////////
331/// write the minimum importance to be considered
332
334{
335 std::ofstream f;
336 if (!OpenRFile("realvarimp",f)) return kFALSE;
337 Float_t rvp[2];
338 rvp[0] = 0.0; // Mode: see varimp() in rulefit.r
339 rvp[1] = 0.0; // Minimum importance considered (1 is max)
340 WriteFloat(f,&rvp[0],2);
341 return kTRUE;
342}
343
344////////////////////////////////////////////////////////////////////////////////
345/// written by rf_go.exe; write rulefit output (rfout)
346
348{
349 fLogger << kWARNING << "WriteRfOut is not yet implemented" << Endl;
350 return kTRUE;
351}
352
353////////////////////////////////////////////////////////////////////////////////
354/// written by rf_go.exe; write rulefit status
355
357{
358 fLogger << kWARNING << "WriteRfStatus is not yet implemented" << Endl;
359 return kTRUE;
360}
361
362////////////////////////////////////////////////////////////////////////////////
363/// written by rf_go.exe (NOTE:Format unknown!)
364
366{
367 fLogger << kWARNING << "WriteRuleFitMod is not yet implemented" << Endl;
368 return kTRUE;
369}
370
371////////////////////////////////////////////////////////////////////////////////
372/// written by rf_go.exe (NOTE: format unknown!)
373
375{
376 fLogger << kWARNING << "WriteRuleFitSum is not yet implemented" << Endl;
377 return kTRUE;
378}
379
380////////////////////////////////////////////////////////////////////////////////
381/// write training data, column wise
382
384{
385 std::ofstream fx;
386 std::ofstream fy;
387 std::ofstream fw;
388 //
389 if (!OpenRFile("train.x",fx)) return kFALSE;
390 if (!OpenRFile("train.y",fy)) return kFALSE;
391 if (!OpenRFile("train.w",fw)) return kFALSE;
392 //
393 Float_t x,y,w;
394 //
395 // The loop order cannot be changed.
396 // The data is stored <var1(eve1), var1(eve2), ...var1(eveN), var2(eve1),....
397 //
398 for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
399 for (Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNTrainingEvents(); ievt++) {
400 const Event * ev = fMethodRuleFit->GetTrainingEvent(ievt);
401 x = ev->GetValue(ivar);
402 WriteFloat(fx,&x,1);
403 if (ivar==0) {
404 w = ev->GetWeight();
405 y = fMethodRuleFit->DataInfo().IsSignal(ev)? 1.0 : -1.0;
406 WriteFloat(fy,&y,1);
407 WriteFloat(fw,&w,1);
408 }
409 }
410 }
411 fLogger << kINFO << "Number of training data written: " << fMethodRuleFit->Data()->GetNTrainingEvents() << Endl;
412 return kTRUE;
413}
414
415////////////////////////////////////////////////////////////////////////////////
416/// Write test data
417
419{
420 fMethodRuleFit->Data()->SetCurrentType(Types::kTesting);
421
422 std::ofstream f;
423 //
424 if (!OpenRFile("test.x",f)) return kFALSE;
425 //
426 Float_t vf;
427 Float_t neve;
428 //
429 neve = static_cast<Float_t>(fMethodRuleFit->Data()->GetNEvents());
430 WriteFloat(f,&neve,1);
431 // Test data is saved as:
432 // 0 : <N> num of events, type float, 4 bytes
433 // 1-N : First variable for all events
434 // N+1-2N : Second variable...
435 // ...
436 for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
437 for (Int_t ievt=0;ievt<fMethodRuleFit->Data()->GetNEvents(); ievt++) {
438 vf = fMethodRuleFit->GetEvent(ievt)->GetValue(ivar);
439 WriteFloat(f,&vf,1);
440 }
441 }
442 fLogger << kINFO << "Number of test data written: " << fMethodRuleFit->Data()->GetNEvents() << Endl;
443 //
444 return kTRUE;
445}
446
447////////////////////////////////////////////////////////////////////////////////
448/// write variable names, ascii
449
451{
452 std::ofstream f;
453 if (!OpenRFile("varnames",f)) return kFALSE;
454 for (UInt_t ivar=0; ivar<fMethodRuleFit->DataInfo().GetNVariables(); ivar++) {
455 f << fMethodRuleFit->DataInfo().GetVariableInfo(ivar).GetExpression() << '\n';
456 }
457 return kTRUE;
458}
459
460////////////////////////////////////////////////////////////////////////////////
461
463
464{
465 // written by rf_go.exe
466 fLogger << kWARNING << "WriteVarImp is not yet implemented" << Endl;
467 return kTRUE;
468}
469
470////////////////////////////////////////////////////////////////////////////////
471/// written by rf_go.exe
472
474{
475 fLogger << kWARNING << "WriteYhat is not yet implemented" << Endl;
476 return kTRUE;
477}
478
479////////////////////////////////////////////////////////////////////////////////
480/// read the score
481
483{
484 fRFYhat.clear();
485 //
486 std::ifstream f;
487 if (!OpenRFile("yhat",f)) return kFALSE;
488 Int_t neve;
489 Float_t xval;
490 ReadFloat(f,&xval,1);
491 neve = static_cast<Int_t>(xval);
492 if (neve!=fMethodRuleFit->Data()->GetNTestEvents()) {
493 fLogger << kWARNING << "Inconsistent size of yhat file and test tree!" << Endl;
494 fLogger << kWARNING << "neve = " << neve << " , tree = " << fMethodRuleFit->Data()->GetNTestEvents() << Endl;
495 return kFALSE;
496 }
497 for (Int_t ievt=0; ievt<fMethodRuleFit->Data()->GetNTestEvents(); ievt++) {
498 ReadFloat(f,&xval,1);
499 fRFYhat.push_back(xval);
500 }
501 return kTRUE;
502}
503
504////////////////////////////////////////////////////////////////////////////////
505/// read variable importance
506
508{
509 fRFVarImp.clear();
510 //
511 std::ifstream f;
512 if (!OpenRFile("varimp",f)) return kFALSE;
513 UInt_t nvars;
514 Float_t xval;
515 Float_t xmax=1.0;
516 nvars=fMethodRuleFit->DataInfo().GetNVariables();
517 //
518 // First read all importances
519 //
520 for (UInt_t ivar=0; ivar<nvars; ivar++) {
521 ReadFloat(f,&xval,1);
522 if (ivar==0) {
523 xmax=xval;
524 } else {
525 if (xval>xmax) xmax=xval;
526 }
527 fRFVarImp.push_back(xval);
528 }
529 //
530 // Read the indices.
531 // They are saved as float (!) by rf_go.exe.
532 //
533 for (UInt_t ivar=0; ivar<nvars; ivar++) {
534 fRFVarImp[ivar] = fRFVarImp[ivar]/xmax;
535 ReadFloat(f,&xval,1);
536 fRFVarImpInd.push_back(Int_t(xval)-1);
537 }
538 return kTRUE;
539}
540
541////////////////////////////////////////////////////////////////////////////////
542/// read model from rulefit.sum
543
545{
546 fRFVarImp.clear();
547 //
548 fLogger << kVERBOSE << "Reading RuleFit summary file" << Endl;
549 std::ifstream f;
550 if (!OpenRFile("rulefit.sum",f)) return kFALSE;
551 Int_t lines=0;
552 Int_t nrules=0;
553 Int_t nvars=0;
554 Int_t nvarsOpt=0;
555 Int_t dumI;
556 Float_t dumF;
557 Float_t offset;
558 Double_t impref=-1.0;
559 Double_t imp;
560
561 fRuleFit->GetRuleEnsemblePtr()->SetAverageRuleSigma(0.4); // value used by Friedmans RuleFit
562 //
563 //--------------------------------------------
564 // first read rulefit.sum header
565 //--------------------------------------------
566 // line type val descr
567 // 0 <int> 86 N(rules)x2
568 // 1 <int> 155 ???
569 // 2 <int> 1 ???
570 // 3 <int> 1916 ???
571 // 4 <int> 2 N(vars) ?
572 // 5 <int> 2 N(vars) ?
573 // 6 <float> 9e+30 xmiss
574 // 7 <float> 1.1e-1 a0 (model offset)
575 //--------------------------------------------
576 //
577 // NOTE: a model without any rules, will look like
578 // for the first four lines:
579 //
580 // 0 1
581 // 1 1
582 // 2 1
583 // 3 0
584 //
585 // There will later be one block of dummy data for one rule.
586 // In order to catch this situation, some special checks are made below.
587 //
588 Bool_t norules;
589 lines += ReadInt(f,&nrules);
590 norules = (nrules==1);
591 lines += ReadInt(f,&dumI);
592 norules = norules && (dumI==1);
593 lines += ReadInt(f,&dumI);
594 norules = norules && (dumI==1);
595 lines += ReadInt(f,&dumI);
596 norules = norules && (dumI==0);
597 if (nrules==0) norules=kTRUE; // this ugly construction is needed:(
598 if (norules) nrules = 0;
599 //
600 lines += ReadInt(f,&nvars);
601 lines += ReadInt(f,&nvarsOpt);
602 lines += ReadFloat(f,&dumF);
603 lines += ReadFloat(f,&offset);
604 fLogger << kDEBUG << "N(rules) = " << nrules << Endl;
605 fLogger << kDEBUG << "N(vars) = " << nvars << Endl;
606 fLogger << kDEBUG << "N(varsO) = " << nvarsOpt << Endl;
607 fLogger << kDEBUG << "xmiss = " << dumF << Endl;
608 fLogger << kDEBUG << "offset = " << offset << Endl;
609 if (nvars!=nvarsOpt) {
610 fLogger << kWARNING << "Format of rulefit.sum is ... weird?? Continuing but who knows how it will end...?" << Endl;
611 }
612 std::vector<Double_t> rfSupp;
613 std::vector<Double_t> rfCoef;
614 std::vector<Int_t> rfNcut;
615 std::vector<Rule *> rfRules;
616 if (norules) {
617 // if no rules, read 8 blocks of data
618 // this corresponds to one dummy rule
619 for (Int_t t=0; t<8; t++) {
620 lines += ReadFloat(f,&dumF);
621 }
622 }
623 //
624 //--------------------------------------------
625 // read first part of rule info
626 //--------------------------------------------
627 //
628 // 8 <int> 10 ???
629 // 9 <float> 0.185 support
630 // 10 <float> 0.051 coefficient
631 // 11 <float> 2 num of cuts in rule
632 // 12 <float> 1 ??? not used by this interface
633 //
634 for (Int_t r=0; r<nrules; r++) {
635 lines += ReadFloat(f,&dumF);
636 lines += ReadFloat(f,&dumF);
637 rfSupp.push_back(dumF);
638 lines += ReadFloat(f,&dumF);
639 rfCoef.push_back(dumF);
640 lines += ReadFloat(f,&dumF);
641 rfNcut.push_back(static_cast<int>(dumF+0.5));
642 lines += ReadFloat(f,&dumF);
643 //
644 }
645 //--------------------------------------------
646 // read second part of rule info
647 //--------------------------------------------
648 //
649 // Per range (cut):
650 // 0 <float> 1 varind
651 // 1 <float> -1.0 low
652 // 2 <float> 1.56 high
653 //
654
655 for (Int_t r=0; r<nrules; r++) {
656 Int_t varind;
659 Rule *rule = new Rule(fRuleFit->GetRuleEnsemblePtr());
660 rfRules.push_back( rule );
661 RuleCut *rfcut = new RuleCut();
662 rfcut->SetNvars(rfNcut[r]);
663 rule->SetRuleCut( rfcut );
664 // the below are set to default values since no info is
665 // available in rulefit.sum
666 rule->SetNorm(1.0);
667 rule->SetSupport(0);
668 rule->SetSSB(0.0);
669 rule->SetSSBNeve(0.0);
670 rule->SetImportanceRef(1.0);
671 rule->SetSSB(0.0);
672 rule->SetSSBNeve(0.0);
673 // set support etc
674 rule->SetSupport(rfSupp[r]);
675 rule->SetCoefficient(rfCoef[r]);
676 rule->CalcImportance();
677 imp = rule->GetImportance();
678 if (imp>impref) impref = imp; // find max importance
679 //
680 fLogger << kDEBUG << "Rule #" << r << " : " << nvars << Endl;
681 fLogger << kDEBUG << " support = " << rfSupp[r] << Endl;
682 fLogger << kDEBUG << " sigma = " << rule->GetSigma() << Endl;
683 fLogger << kDEBUG << " coeff = " << rfCoef[r] << Endl;
684 fLogger << kDEBUG << " N(cut) = " << rfNcut[r] << Endl;
685
686 for (Int_t c=0; c<rfNcut[r]; c++) {
687 lines += ReadFloat(f,&dumF);
688 varind = static_cast<Int_t>(dumF+0.5)-1;
689 lines += ReadFloat(f,&dumF);
690 xmin = static_cast<Double_t>(dumF);
691 lines += ReadFloat(f,&dumF);
692 xmax = static_cast<Double_t>(dumF);
693 // create Rule HERE!
694 rfcut->SetSelector(c,varind);
695 rfcut->SetCutMin(c,xmin);
696 rfcut->SetCutMax(c,xmax);
697 // the following is not nice - this is however defined
698 // by the rulefit.sum format.
699 rfcut->SetCutDoMin(c,(xmin<-8.99e35 ? kFALSE:kTRUE));
700 rfcut->SetCutDoMax(c,(xmax> 8.99e35 ? kFALSE:kTRUE));
701 //
702 }
703 }
704 fRuleFit->GetRuleEnsemblePtr()->SetRules( rfRules );
705 fRuleFit->GetRuleEnsemblePtr()->SetOffset( offset );
706 //--------------------------------------------
707 // read second part of rule info
708 //--------------------------------------------
709 //
710 // Per linear term:
711 // 73 1 var index
712 // 74 -1.99594 min
713 // 75 1.99403 max
714 // 76 -0.000741858 ??? average ???
715 // 77 0.970935 std
716 // 78 0 coeff
717 //
718 std::vector<Int_t> varind;
719 std::vector<Double_t> xmin;
720 std::vector<Double_t> xmax;
721 std::vector<Double_t> average;
722 std::vector<Double_t> stdev;
723 std::vector<Double_t> norm;
724 std::vector<Double_t> coeff;
725 //
726 for (Int_t c=0; c<nvars; c++) {
727 lines += ReadFloat(f,&dumF);
728 varind.push_back(static_cast<Int_t>(dumF+0.5)-1);
729 lines += ReadFloat(f,&dumF);
730 xmin.push_back(static_cast<Double_t>(dumF));
731 lines += ReadFloat(f,&dumF);
732 xmax.push_back(static_cast<Double_t>(dumF));
733 lines += ReadFloat(f,&dumF);
734 average.push_back(static_cast<Double_t>(dumF));
735 lines += ReadFloat(f,&dumF);
736 stdev.push_back(static_cast<Double_t>(dumF));
737 Double_t nv = fRuleFit->GetRuleEnsemblePtr()->CalcLinNorm(stdev.back());
738 norm.push_back(nv);
739 lines += ReadFloat(f,&dumF);
740 coeff.push_back(dumF/nv); // save coefficient for normalised var
741 //
742 fLogger << kDEBUG << "Linear #" << c << Endl;
743 fLogger << kDEBUG << " varind = " << varind.back() << Endl;
744 fLogger << kDEBUG << " xmin = " << xmin.back() << Endl;
745 fLogger << kDEBUG << " xmax = " << xmax.back() << Endl;
746 fLogger << kDEBUG << " average = " << average.back() << Endl;
747 fLogger << kDEBUG << " stdev = " << stdev.back() << Endl;
748 fLogger << kDEBUG << " coeff = " << coeff.back() << Endl;
749 }
750 if (xmin.size()>0) {
751 fRuleFit->GetRuleEnsemblePtr()->SetLinCoefficients(coeff);
752 fRuleFit->GetRuleEnsemblePtr()->SetLinDM(xmin);
753 fRuleFit->GetRuleEnsemblePtr()->SetLinDP(xmax);
754 fRuleFit->GetRuleEnsemblePtr()->SetLinNorm(norm);
755 }
756 // fRuleFit->GetRuleEnsemblePtr()->CalcImportance();
757 imp = fRuleFit->GetRuleEnsemblePtr()->CalcLinImportance();
758 if (imp>impref) impref=imp;
759 fRuleFit->GetRuleEnsemblePtr()->SetImportanceRef(impref);
760 fRuleFit->GetRuleEnsemblePtr()->CleanupLinear(); // to fill fLinTermOK vector
761
762 fRuleFit->GetRuleEnsemblePtr()->CalcVarImportance();
763 // fRuleFit->GetRuleEnsemblePtr()->CalcRuleSupport();
764
765 fLogger << kDEBUG << "Reading model done" << Endl;
766 return kTRUE;
767}
768
769////////////////////////////////////////////////////////////////////////////////
770/// execute rf_go.exe
771
773{
774 TString oldDir = gSystem->pwd();
775 TString cmd = "./rf_go.exe";
776 gSystem->cd(fRFWorkDir.Data());
777 int rval = gSystem->Exec(cmd.Data());
778 gSystem->cd(oldDir.Data());
779 return rval;
780}
ROOT::R::TRInterface & r
Definition Object.C:4
#define f(i)
Definition RSha256.hxx:104
#define c(i)
Definition RSha256.hxx:101
int Int_t
Definition RtypesCore.h:45
const Bool_t kFALSE
Definition RtypesCore.h:92
double Double_t
Definition RtypesCore.h:59
float Float_t
Definition RtypesCore.h:57
const Bool_t kTRUE
Definition RtypesCore.h:91
#define ClassImp(name)
Definition Rtypes.h:364
float xmin
float xmax
R__EXTERN TSystem * gSystem
Definition TSystem.h:559
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition Event.cxx:236
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition Event.cxx:381
J Friedman's RuleFit method.
const TString GetRFWorkDir() const
A class describing a 'rule cut'.
Definition RuleCut.h:36
void SetSelector(Int_t i, UInt_t s)
Definition RuleCut.h:65
void SetCutDoMin(Int_t i, Bool_t v)
Definition RuleCut.h:68
void SetCutMin(Int_t i, Double_t v)
Definition RuleCut.h:66
void SetCutMax(Int_t i, Double_t v)
Definition RuleCut.h:67
void SetNvars(UInt_t nc)
Definition RuleCut.h:143
void SetCutDoMax(Int_t i, Bool_t v)
Definition RuleCut.h:69
J Friedman's RuleFit method.
Definition RuleFitAPI.h:51
void SetTestParms()
set the test params
Bool_t WriteRuleFitSum()
written by rf_go.exe (NOTE: format unknown!)
Bool_t WriteYhat()
written by rf_go.exe
Bool_t WriteAll()
write all files read by rf_go.exe
void ImportSetup()
import setup from MethodRuleFit
Bool_t WriteRfStatus()
written by rf_go.exe; write rulefit status
Bool_t WriteIntParms()
write int params file
void CheckRFWorkDir()
check if the rulefit work dir is properly setup.
Bool_t WriteProgram()
write command to rf_go.exe
Bool_t ReadModelSum()
read model from rulefit.sum
void SetRFWorkDir(const char *wdir)
set the directory containing rf_go.exe.
Bool_t ReadVarImp()
read variable importance
Bool_t WriteRuleFitMod()
written by rf_go.exe (NOTE:Format unknown!)
Bool_t WriteRfOut()
written by rf_go.exe; write rulefit output (rfout)
void InitRuleFit()
default initialisation SetRFWorkDir("./rulefit");
void FillRealParmsDef()
set default real params
Bool_t WriteVarNames()
write variable names, ascii
Bool_t WriteRealVarImp()
write the minimum importance to be considered
void FillIntParmsDef()
set default int params
void WelcomeMessage()
welcome message
Bool_t WriteTrain()
write training data, column wise
virtual ~RuleFitAPI()
destructor
Bool_t WriteRealParms()
write int params file
Bool_t WriteLx()
Save input variable mask.
Bool_t ReadYhat()
read the score
void HowtoSetupRF()
howto message
Bool_t WriteTest()
Write test data.
void SetTrainParms()
set the training parameters
Int_t RunRuleFit()
execute rf_go.exe
A class implementing various fits of rule ensembles.
Definition RuleFit.h:46
Implementation of a rule.
Definition Rule.h:50
void SetImportanceRef(Double_t v)
Definition Rule.h:96
void SetCoefficient(Double_t v)
Definition Rule.h:84
void SetNorm(Double_t norm)
Definition Rule.h:81
Double_t GetImportance() const
Definition Rule.h:145
Double_t GetSigma() const
Definition Rule.h:143
void SetSSBNeve(Double_t v)
Definition Rule.h:93
void SetRuleCut(RuleCut *rc)
Definition Rule.h:78
void CalcImportance()
Definition Rule.h:99
void SetSupport(Double_t v)
Definition Rule.h:87
void SetSSB(Double_t v)
Definition Rule.h:90
Basic string class.
Definition TString.h:136
const char * Data() const
Definition TString.h:369
Bool_t cd(const char *path)
Definition TSystem.h:421
const char * pwd()
Definition TSystem.h:422
virtual Int_t Exec(const char *shellcmd)
Execute a command.
Definition TSystem.cxx:654
Double_t y[n]
Definition legend1.C:17
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:158