Logo ROOT  
Reference Guide
RuleEnsemble.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : RuleEnsemble *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * A class generating an ensemble of rules *
12 * Input: a forest of decision trees *
13 * Output: an ensemble of rules *
14 * *
15 * Authors (alphabetical): *
16 * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
18 * *
19 * Copyright (c) 2005: *
20 * CERN, Switzerland *
21 * Iowa State U. *
22 * MPI-K Heidelberg, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29#ifndef ROOT_TMVA_RuleEnsemble
30#define ROOT_TMVA_RuleEnsemble
31
32#include "TMath.h"
33
34#include "TMVA/DecisionTree.h"
35#include "TMVA/Event.h"
36#include "TMVA/Rule.h"
37#include "TMVA/Types.h"
38
39class TH1F;
40
41namespace TMVA {
42
43 class MethodBase;
44 class RuleFit;
45 class MethodRuleFit;
46 class RuleEnsemble;
47 class MsgLogger;
48
49 std::ostream& operator<<( std::ostream& os, const RuleEnsemble& event );
50
52
53 // output operator for a RuleEnsemble
54 friend std::ostream& operator<< ( std::ostream& os, const RuleEnsemble& rules );
55
56 public:
57
59
60 // main constructor
61 RuleEnsemble( RuleFit* rf );
62
63 // copy constructor
64 RuleEnsemble( const RuleEnsemble& other );
65
66 // empty constructor
68
69 // destructor
70 virtual ~RuleEnsemble();
71
72 // initialize
73 void Initialize( const RuleFit* rf );
74
75 // set message type
76 void SetMsgType( EMsgType t );
77
78 // makes the model - calls MakeRules() and MakeLinearTerms()
79 void MakeModel();
80
81 // generates the rules from a given forest of decision trees
82 void MakeRules( const std::vector< const TMVA::DecisionTree *>& forest );
83
84 // make the linear terms
85 void MakeLinearTerms();
86
87 // select linear model
89
90 // select rule model
92
93 // select full (linear+rules) model
95
96 // set rule collection (if not created by MakeRules())
97 void SetRules( const std::vector< TMVA::Rule *> & rules );
98
99 // set RuleFit ptr
100 void SetRuleFit( const RuleFit *rf ) { fRuleFit = rf; }
101
102 // set coefficients
103 void SetCoefficients( const std::vector< Double_t >& v );
104 void SetCoefficient( UInt_t i, Double_t v ) { if (i<fRules.size()) fRules[i]->SetCoefficient(v); }
105 //
106 void SetOffset(Double_t v=0.0) { fOffset=v; }
108 void SetLinCoefficients( const std::vector< Double_t >& v ) { fLinCoefficients = v; }
110 void SetLinDM( const std::vector<Double_t> & xmin ) { fLinDM = xmin; }
111 void SetLinDP( const std::vector<Double_t> & xmax ) { fLinDP = xmax; }
112 void SetLinNorm( const std::vector<Double_t> & norm ) { fLinNorm = norm; }
113
114 Double_t CalcLinNorm( Double_t stdev ) { return ( stdev>0 ? fAverageRuleSigma/stdev : 1.0 ); }
115
116 // clear coefficients
117 void ClearCoefficients( Double_t val=0 ) { for (UInt_t i=0; i<fRules.size(); i++) fRules[i]->SetCoefficient(val); }
118 void ClearLinCoefficients( Double_t val=0 ) { for (UInt_t i=0; i<fLinCoefficients.size(); i++) fLinCoefficients[i]=val; }
119 void ClearLinNorm( Double_t val=1.0 ) { for (UInt_t i=0; i<fLinNorm.size(); i++) fLinNorm[i]=val; }
120
121 // set maximum allowed distance between equal rules
123
124 // set minimum rule importance - used by CleanupRules()
125 void SetImportanceCut(Double_t minimp=0) { fImportanceCut=minimp; }
126
127 // set the quantile for linear terms
129
130 // set average sigma for rules
131 void SetAverageRuleSigma(Double_t v) { if (v>0.5) v=0.5; fAverageRuleSigma = v; fAverageSupport = 0.5*(1.0+TMath::Sqrt(1.0-4.0*v*v)); }
132
133 // Calculate the number of possible rules from a given tree
134 Int_t CalcNRules( const TMVA::DecisionTree* dtree );
135 // Recursively search for end-nodes; used by CalcNRules()
136 void FindNEndNodes( const TMVA::Node* node, Int_t& nendnodes );
137
138 // set current event to be used
139 void SetEvent( const Event & e ) { fEvent = &e; fEventCacheOK = kFALSE; }
140
141 // fill cached values of rule/linear respons
142 void UpdateEventVal();
143
144 // fill binary rule respons for all events (or selected subset)
145 void MakeRuleMap(const std::vector<const TMVA::Event *> *events=0, UInt_t ifirst=0, UInt_t ilast=0);
146
147 // clear rule map
148 void ClearRuleMap() { fRuleMap.clear(); fRuleMapEvents=0; }
149
150 // evaluates the event using the ensemble of rules
151 // the following uses fEventCache, that is per event saved in cache
152 Double_t EvalEvent() const;
153 Double_t EvalEvent( const Event & e );
154
155 // same as previous but using other model coefficients
157 const std::vector<Double_t> & coefs,
158 const std::vector<Double_t> & lincoefs) const;
159 Double_t EvalEvent( const Event & e,
160 Double_t ofs,
161 const std::vector<Double_t> & coefs,
162 const std::vector<Double_t> & lincoefs);
163
164 // same as above but using the event index
165 // these will use fRuleMap - MUST call MakeRuleMap() before - no check...
166 Double_t EvalEvent( UInt_t evtidx ) const;
167 Double_t EvalEvent( UInt_t evtidx,
168 Double_t ofs,
169 const std::vector<Double_t> & coefs,
170 const std::vector<Double_t> & lincoefs) const;
171
172 // evaluate the linear term using event by reference
173 // Double_t EvalLinEvent( UInt_t vind ) const;
174 Double_t EvalLinEvent() const;
175 Double_t EvalLinEvent( const std::vector<Double_t> & coefs ) const;
176 Double_t EvalLinEvent( const Event &e );
177 Double_t EvalLinEvent( const Event &e, UInt_t vind );
178 Double_t EvalLinEvent( const Event &e, const std::vector<Double_t> & coefs );
179
180 // idem but using evtidx - must call MakeRuleMap() first
181 Double_t EvalLinEvent( UInt_t evtidx ) const;
182 Double_t EvalLinEvent( UInt_t evtidx, const std::vector<Double_t> & coefs ) const;
183 Double_t EvalLinEvent( UInt_t evtidx, UInt_t vind ) const;
184 Double_t EvalLinEvent( UInt_t evtidx, UInt_t vind, Double_t coefs ) const;
185
186 // evaluate linear terms used to fill fEventLinearVal
187 Double_t EvalLinEventRaw( UInt_t vind, const Event &e, Bool_t norm ) const;
188 Double_t EvalLinEventRaw( UInt_t vind, UInt_t evtidx, Bool_t norm ) const;
189
190 // calculate p(y=1|x) for a given event using the linear terms
191 Double_t PdfLinear( Double_t & nsig, Double_t & ntot ) const;
192
193 // calculate p(y=1|x) for a given event using the rules
194 Double_t PdfRule( Double_t & nsig, Double_t & ntot ) const;
195
196 // calculate F* = 2*p(y=1|x) - 1
197 Double_t FStar() const;
198 Double_t FStar(const TMVA::Event & e );
199
200 // set reference importance for all model objects
201 void SetImportanceRef(Double_t impref);
202
203 // calculates the support for all rules given the set of events
204 void CalcRuleSupport();
205
206 // calculates rule importance
207 void CalcImportance();
208
209 // calculates rule importance
211
212 // calculates linear importance
214
215 // calculates variable importance
216 void CalcVarImportance();
217
218 // remove rules of low importance
219 void CleanupRules();
220
221 // remove linear terms of low importance
222 void CleanupLinear();
223
224 // remove similar rules
225 void RemoveSimilarRules();
226
227 // get rule statistics
228 void RuleStatistics();
229
230 // get rule response stats
231 void RuleResponseStats();
232
233 // copy operator
234 void operator=( const RuleEnsemble& other ) { Copy( other ); }
235
236 // calculate sum of the squared coefficients
238
239 // fill the vector with the coefficients
240 void GetCoefficients( std::vector< Double_t >& v );
241
242 // accessors
243 const MethodRuleFit* GetMethodRuleFit() const;
244 const MethodBase* GetMethodBase() const;
245 const RuleFit* GetRuleFit() const { return fRuleFit; }
246 //
247 const std::vector<const TMVA::Event *>* GetTrainingEvents() const;
248 const Event* GetTrainingEvent(UInt_t i) const;
249 const Event* GetEvent() const { return fEvent; }
250 //
255 Bool_t DoFull() const { return (fLearningModel==kFull); }
259 Double_t GetOffset() const { return fOffset; }
260 UInt_t GetNRules() const { return (DoRules() ? fRules.size():0); }
261 const std::vector<TMVA::Rule*>& GetRulesConst() const { return fRules; }
262 std::vector<TMVA::Rule*>& GetRules() { return fRules; }
263 const std::vector< Double_t >& GetLinCoefficients() const { return fLinCoefficients; }
264 const std::vector< Double_t >& GetLinNorm() const { return fLinNorm; }
265 const std::vector< Double_t >& GetLinImportance() const { return fLinImportance; }
266 const std::vector< Double_t >& GetVarImportance() const { return fVarImportance; }
267 UInt_t GetNLinear() const { return (DoLinear() ? fLinNorm.size():0); }
269
270 const Rule *GetRulesConst(int i) const { return fRules[i]; }
271 Rule *GetRules(int i) { return fRules[i]; }
272
273 UInt_t GetRulesNCuts(int i) const { return fRules[i]->GetRuleCut()->GetNcuts(); }
275 Double_t GetLinCoefficients(int i) const { return fLinCoefficients[i]; }
276 Double_t GetLinNorm(int i) const { return fLinNorm[i]; }
277 Double_t GetLinDM(int i) const { return fLinDM[i]; }
278 Double_t GetLinDP(int i) const { return fLinDP[i]; }
279 Double_t GetLinImportance(int i) const { return fLinImportance[i]; }
280 Double_t GetVarImportance(int i) const { return fVarImportance[i]; }
281 Double_t GetRulePTag(int i) const { return fRulePTag[i]; }
282 Double_t GetRulePSS(int i) const { return fRulePSS[i]; }
283 Double_t GetRulePSB(int i) const { return fRulePSB[i]; }
284 Double_t GetRulePBS(int i) const { return fRulePBS[i]; }
285 Double_t GetRulePBB(int i) const { return fRulePBB[i]; }
286
287 Bool_t IsLinTermOK(int i) const { return fLinTermOK[i]; }
288 //
291 Double_t GetEventRuleVal(UInt_t i) const { return (fEventRuleVal[i] ? 1.0:0.0); }
294 //
295 const std::vector<UInt_t> & GetEventRuleMap(UInt_t evtidx) const { return fRuleMap[evtidx]; }
296 const TMVA::Event *GetRuleMapEvent(UInt_t evtidx) const { return (*fRuleMapEvents)[evtidx]; }
297 Bool_t IsRuleMapOK() const { return fRuleMapOK; }
298
299 // print rule generation info
300 void PrintRuleGen() const;
301
302 // print the ensemble
303 void Print() const;
304
305 // print the model in a cryptic way
306 void PrintRaw ( std::ostream& os ) const; // obsolete
307 void* AddXMLTo ( void* parent ) const;
308
309 // read the model from input stream
310 void ReadRaw ( std::istream& istr ); // obsolete
311 void ReadFromXML( void* wghtnode );
312
313
314 private:
315
316 // delete all rules
317 void DeleteRules() { for (UInt_t i=0; i<fRules.size(); i++) delete fRules[i]; fRules.clear(); }
318
319 // copy method
320 void Copy( RuleEnsemble const& other );
321
322 // set all coeffs to default values
323 void ResetCoefficients();
324
325 // make rules form one decision tree
326 void MakeRulesFromTree( const DecisionTree *dtree );
327
328 // add a rule with the given end-node
329 void AddRule( const Node *node );
330
331 // make a rule
332 Rule *MakeTheRule( const Node *node );
333
334
335 ELearningModel fLearningModel; // can be full (rules+linear), rules, linear
336 Double_t fImportanceCut; // minimum importance accepted
337 Double_t fLinQuantile; // quantile cut to remove outliers
338 Double_t fOffset; // offset in discriminator function
339 std::vector< TMVA::Rule* > fRules; // vector of rules
340 std::vector< Char_t > fLinTermOK; // flags linear terms with sufficient strong importance <-- stores boolean
341 std::vector< Double_t > fLinDP; // delta+ in eq 24, ref 2
342 std::vector< Double_t > fLinDM; // delta-
343 std::vector< Double_t > fLinCoefficients; // linear coefficients, one per variable
344 std::vector< Double_t > fLinNorm; // norm of ditto, see after eq 26 in ref 2
345 std::vector< TH1F* > fLinPDFB; // pdfs for each variable, background
346 std::vector< TH1F* > fLinPDFS; // pdfs for each variable, signal
347 std::vector< Double_t > fLinImportance; // linear term importance
348 std::vector< Double_t > fVarImportance; // one importance per input variable
349 Double_t fImportanceRef; // reference importance (max)
350 Double_t fAverageSupport; // average support (over all rules)
351 Double_t fAverageRuleSigma; // average rule sigma
352 //
353 std::vector< Double_t > fRuleVarFrac; // fraction of rules using a given variable - size of vector = n(variables)
354 std::vector< Double_t > fRulePSS; // p(tag as S|S) - tagged as S if rule is SIG and the event is accepted
355 std::vector< Double_t > fRulePSB; // p(tag as S|B)
356 std::vector< Double_t > fRulePBS; // p(tag as B|S)
357 std::vector< Double_t > fRulePBB; // p(tag as B|B)
358 std::vector< Double_t > fRulePTag; // p(tag)
359 Double_t fRuleFSig; // N(sig)/N(sig)+N(bkg)
360 Double_t fRuleNCave; // N(cuts) average
361 Double_t fRuleNCsig; // idem sigma
362 //
363 Double_t fRuleMinDist; // minimum rule distance
364 UInt_t fNRulesGenerated; // number of rules generated, before cleanup
365 //
366 const Event* fEvent; // current event.
367 Bool_t fEventCacheOK; // true if rule/linear respons are updated
368 std::vector<Char_t> fEventRuleVal; // the rule respons of current event <----- stores boolean
369 std::vector<Double_t> fEventLinearVal; // linear respons
370 //
371 Bool_t fRuleMapOK; // true if MakeRuleMap() has been called
372 std::vector< std::vector<UInt_t> > fRuleMap; // map of rule responses
373 UInt_t fRuleMapInd0; // start index
374 UInt_t fRuleMapInd1; // last index
375 const std::vector<const TMVA::Event *> *fRuleMapEvents; // pointer to vector of events used
376 //
377 const RuleFit* fRuleFit; // pointer to rule fit object
378
379 mutable MsgLogger* fLogger; //! message logger
380 MsgLogger& Log() const { return *fLogger; }
381 };
382}
383
384//_______________________________________________________________________
386{
387 //
388 // Update rule and linear respons using the current event
389 //
390 if (fEventCacheOK) return;
391 //
392 if (DoRules()) {
393 UInt_t nrules = fRules.size();
394 fEventRuleVal.resize(nrules,kFALSE);
395 for (UInt_t r=0; r<nrules; r++) {
396 fEventRuleVal[r] = fRules[r]->EvalEvent(*fEvent);
397 }
398 }
399 if (DoLinear()) {
400 UInt_t nlin = fLinTermOK.size();
401 fEventLinearVal.resize(nlin,0);
402 for (UInt_t r=0; r<nlin; r++) {
403 fEventLinearVal[r] = EvalLinEventRaw(r,*fEvent,kFALSE); // not normalised!
404 }
405 }
407}
408
409//_____________________________________________________________________
411{
412 // evaluate current event
413
414 Int_t nrules = fRules.size();
415 Double_t rval=fOffset;
416 Double_t linear=0;
417 //
418 // evaluate all rules
419 // normally it should NOT use the normalized rules - the flag should be kFALSE
420 //
421 if (DoRules()) {
422 for ( Int_t i=0; i<nrules; i++ ) {
423 if (fEventRuleVal[i])
424 rval += fRules[i]->GetCoefficient();
425 }
426 }
427 //
428 // Include linear part - the call below incorporates both coefficient and normalisation (fLinNorm)
429 //
430 if (DoLinear()) linear = EvalLinEvent();
431 rval +=linear;
432
433 return rval;
434}
435
436//_____________________________________________________________________
438 const std::vector<Double_t> & coefs,
439 const std::vector<Double_t> & lincoefs ) const
440{
441 // evaluate current event with given offset and coefs
442
443 Int_t nrules = fRules.size();
444 Double_t rval = ofs;
445 Double_t linear = 0;
446 //
447 // evaluate all rules
448 //
449 if (DoRules()) {
450 for ( Int_t i=0; i<nrules; i++ ) {
451 if (fEventRuleVal[i])
452 rval += coefs[i];
453 }
454 }
455 //
456 // Include linear part - the call below incorporates both coefficient and normalisation (fLinNorm)
457 //
458 if (DoLinear()) linear = EvalLinEvent(lincoefs);
459 rval +=linear;
460
461 return rval;
462}
463
464//_____________________________________________________________________
466{
467 // evaluate event e
468 SetEvent(e);
469 UpdateEventVal();
470 return EvalEvent();
471}
472
473//_____________________________________________________________________
475 Double_t ofs,
476 const std::vector<Double_t> & coefs,
477 const std::vector<Double_t> & lincoefs )
478{
479 // evaluate event e
480 SetEvent(e);
481 UpdateEventVal();
482 return EvalEvent(ofs,coefs,lincoefs);
483}
484
485//_____________________________________________________________________
487{
488 // evaluate event with index evtidx
489 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
490 //
491 Double_t rval=fOffset;
492 if (DoRules()) {
493 UInt_t nrules = fRuleMap[evtidx].size();
494 UInt_t rind;
495 for (UInt_t ir = 0; ir<nrules; ir++) {
496 rind = fRuleMap[evtidx][ir];
497 rval += fRules[rind]->GetCoefficient();
498 }
499 }
500 if (DoLinear()) {
501 UInt_t nlin = fLinTermOK.size();
502 for (UInt_t r=0; r<nlin; r++) {
503 if (fLinTermOK[r]) {
504 rval += fLinCoefficients[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
505 }
506 }
507 }
508 return rval;
509}
510
511//_____________________________________________________________________
513 Double_t ofs,
514 const std::vector<Double_t> & coefs,
515 const std::vector<Double_t> & lincoefs ) const
516{
517 // evaluate event with index evtidx and user given model coefficients
518 //
519 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
520 Double_t rval=ofs;
521 if (DoRules()) {
522 UInt_t nrules = fRuleMap[evtidx].size();
523 UInt_t rind;
524 for (UInt_t ir = 0; ir<nrules; ir++) {
525 rind = fRuleMap[evtidx][ir];
526 rval += coefs[rind];
527 }
528 }
529 if (DoLinear()) {
530 rval += EvalLinEvent( evtidx, lincoefs );
531 }
532 return rval;
533}
534
535//_______________________________________________________________________
537{
538 // evaluate the event linearly (not normalized)
539
540 Double_t val = e.GetValue(vind);
541 Double_t rval = TMath::Min( fLinDP[vind], TMath::Max( fLinDM[vind], val ) );
542 if (norm) rval *= fLinNorm[vind];
543 return rval;
544}
545
546//_______________________________________________________________________
548{
549 // evaluate the event linearly (not normalized)
550
551 Double_t val = (*fRuleMapEvents)[evtidx]->GetValue(vind);
552 Double_t rval = TMath::Min( fLinDP[vind], TMath::Max( fLinDM[vind], val ) );
553 if (norm) rval *= fLinNorm[vind];
554 return rval;
555}
556
557//_______________________________________________________________________
559{
560 // evaluate event linearly
561
562 Double_t rval=0;
563 for (UInt_t v=0; v<fLinTermOK.size(); v++) {
564 if (fLinTermOK[v])
565 rval += fLinCoefficients[v]*fEventLinearVal[v]*fLinNorm[v];
566 }
567 return rval;
568}
569
570//_______________________________________________________________________
571inline Double_t TMVA::RuleEnsemble::EvalLinEvent(const std::vector<Double_t> & coefs) const
572{
573 // evaluate event linearly using the given coefficients
574
575 Double_t rval=0;
576 for (UInt_t v=0; v<fLinTermOK.size(); v++) {
577 if (fLinTermOK[v])
578 rval += coefs[v]*fEventLinearVal[v]*fLinNorm[v];
579 }
580 return rval;
581}
582
583//_______________________________________________________________________
585{
586 // evaluate event linearly
587
588 SetEvent(e);
589 UpdateEventVal();
590 return EvalLinEvent();
591}
592
593//_______________________________________________________________________
595{
596 // evaluate linear term vind
597
598 SetEvent(e);
599 UpdateEventVal();
600 return GetEventLinearValNorm(vind);
601}
602
603//_______________________________________________________________________
604inline Double_t TMVA::RuleEnsemble::EvalLinEvent( const TMVA::Event& e, const std::vector<Double_t> & coefs )
605{
606 // evaluate event linearly using the given coefficients
607
608 SetEvent(e);
609 UpdateEventVal();
610 return EvalLinEvent(coefs);
611}
612
613//_______________________________________________________________________
614inline Double_t TMVA::RuleEnsemble::EvalLinEvent( UInt_t evtidx, const std::vector<Double_t> & coefs ) const
615{
616 // evaluate event linearly using the given coefficients
617 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
618 Double_t rval=0;
619 UInt_t nlin = fLinTermOK.size();
620 for (UInt_t r=0; r<nlin; r++) {
621 if (fLinTermOK[r]) {
622 rval += coefs[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
623 }
624 }
625 return rval;
626}
627
628//_______________________________________________________________________
630{
631 // evaluate event linearly using the given coefficients
632 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
633 Double_t rval=0;
634 UInt_t nlin = fLinTermOK.size();
635 for (UInt_t r=0; r<nlin; r++) {
636 if (fLinTermOK[r]) {
637 rval += fLinCoefficients[r] * EvalLinEventRaw(r,*(*fRuleMapEvents)[evtidx],kTRUE);
638 }
639 }
640 return rval;
641}
642
643//_______________________________________________________________________
645{
646 // evaluate event linearly using the given coefficients
647 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
648 Double_t rval;
649 rval = fLinCoefficients[vind] * EvalLinEventRaw(vind,*(*fRuleMapEvents)[evtidx],kTRUE);
650 return rval;
651}
652
653//_______________________________________________________________________
655{
656 // evaluate event linearly using the given coefficients
657 if ((evtidx<fRuleMapInd0) || (evtidx>fRuleMapInd1)) return 0;
658 Double_t rval;
659 rval = coefs * EvalLinEventRaw(vind,*(*fRuleMapEvents)[evtidx],kTRUE);
660 return rval;
661}
662
663#endif
ROOT::R::TRInterface & r
Definition: Object.C:4
#define d(i)
Definition: RSha256.hxx:102
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:87
float xmin
Definition: THbookFile.cxx:93
float * q
Definition: THbookFile.cxx:87
float xmax
Definition: THbookFile.cxx:93
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
Implementation of a Decision Tree.
Definition: DecisionTree.h:64
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
J Friedman's RuleFit method.
Definition: MethodRuleFit.h:47
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Node for the BinarySearch or Decision Trees.
Definition: Node.h:56
std::vector< Double_t > fRulePBB
Definition: RuleEnsemble.h:357
Bool_t IsRuleMapOK() const
Definition: RuleEnsemble.h:297
MsgLogger & Log() const
message logger
Definition: RuleEnsemble.h:380
void SetLinCoefficients(const std::vector< Double_t > &v)
Definition: RuleEnsemble.h:108
Double_t GetLinDP(int i) const
Definition: RuleEnsemble.h:278
virtual ~RuleEnsemble()
destructor
Double_t EvalEvent() const
Definition: RuleEnsemble.h:410
const std::vector< UInt_t > & GetEventRuleMap(UInt_t evtidx) const
Definition: RuleEnsemble.h:295
Double_t GetRulePBB(int i) const
Definition: RuleEnsemble.h:285
void CalcVarImportance()
Calculates variable importance using eq (35) in RuleFit paper by Friedman et.al.
std::vector< Double_t > fLinImportance
Definition: RuleEnsemble.h:347
void SetLinCoefficient(UInt_t i, Double_t v)
Definition: RuleEnsemble.h:109
Double_t GetLinDM(int i) const
Definition: RuleEnsemble.h:277
void SetOffset(Double_t v=0.0)
Definition: RuleEnsemble.h:106
void SetImportanceRef(Double_t impref)
set reference importance
void CalcImportance()
calculate the importance of each rule
void PrintRuleGen() const
print rule generation info
void MakeRuleMap(const std::vector< const TMVA::Event * > *events=0, UInt_t ifirst=0, UInt_t ilast=0)
Makes rule map for all events.
Int_t CalcNRules(const TMVA::DecisionTree *dtree)
calculate the number of rules
std::vector< Double_t > fLinCoefficients
Definition: RuleEnsemble.h:343
void ResetCoefficients()
reset all rule coefficients
Bool_t DoRules() const
Definition: RuleEnsemble.h:252
std::vector< Double_t > fRulePBS
Definition: RuleEnsemble.h:356
void SetMsgType(EMsgType t)
std::vector< TMVA::Rule * > fRules
Definition: RuleEnsemble.h:339
Bool_t DoOnlyLinear() const
Definition: RuleEnsemble.h:254
void SetLinQuantile(Double_t q)
Definition: RuleEnsemble.h:128
Double_t CalcLinNorm(Double_t stdev)
Definition: RuleEnsemble.h:114
Double_t GetLinQuantile() const
Definition: RuleEnsemble.h:268
Double_t GetVarImportance(int i) const
Definition: RuleEnsemble.h:280
void ReadRaw(std::istream &istr)
read rule ensemble from stream
std::vector< Double_t > fRulePSS
Definition: RuleEnsemble.h:354
std::vector< Double_t > fLinDP
Definition: RuleEnsemble.h:341
Double_t EvalLinEventRaw(UInt_t vind, const Event &e, Bool_t norm) const
Definition: RuleEnsemble.h:536
const Event * fEvent
Definition: RuleEnsemble.h:366
void AddRule(const Node *node)
add a new rule to the tree
Double_t GetRulePTag(int i) const
Definition: RuleEnsemble.h:281
void ReadFromXML(void *wghtnode)
read rules from XML
void SetLinDP(const std::vector< Double_t > &xmax)
Definition: RuleEnsemble.h:111
void SetRuleFit(const RuleFit *rf)
Definition: RuleEnsemble.h:100
Double_t GetImportanceCut() const
Definition: RuleEnsemble.h:257
const Event * GetTrainingEvent(UInt_t i) const
get the training event from the rule fitter
const std::vector< const TMVA::Event * > * GetTrainingEvents() const
get list of training events from the rule fitter
Double_t GetRuleMinDist() const
Definition: RuleEnsemble.h:274
void SetLinNorm(const std::vector< Double_t > &norm)
Definition: RuleEnsemble.h:112
void SetRules(const std::vector< TMVA::Rule * > &rules)
set rules
void MakeRules(const std::vector< const TMVA::DecisionTree * > &forest)
Makes rules from the given decision tree.
void RemoveSimilarRules()
remove rules that behave similar
std::vector< Double_t > fRulePTag
Definition: RuleEnsemble.h:358
std::vector< TH1F * > fLinPDFB
Definition: RuleEnsemble.h:345
std::vector< Char_t > fEventRuleVal
Definition: RuleEnsemble.h:368
ELearningModel fLearningModel
Definition: RuleEnsemble.h:335
void FindNEndNodes(const TMVA::Node *node, Int_t &nendnodes)
find the number of leaf nodes
Double_t GetRulePBS(int i) const
Definition: RuleEnsemble.h:284
RuleEnsemble()
constructor
Double_t GetEventRuleVal(UInt_t i) const
Definition: RuleEnsemble.h:291
const std::vector< Double_t > & GetLinCoefficients() const
Definition: RuleEnsemble.h:263
Double_t GetImportanceRef() const
Definition: RuleEnsemble.h:258
const RuleFit * fRuleFit
Definition: RuleEnsemble.h:377
std::vector< TMVA::Rule * > & GetRules()
Definition: RuleEnsemble.h:262
const std::vector< Double_t > & GetVarImportance() const
Definition: RuleEnsemble.h:266
void CleanupRules()
cleanup rules
void Initialize(const RuleFit *rf)
Initializes all member variables with default values.
const std::vector< Double_t > & GetLinNorm() const
Definition: RuleEnsemble.h:264
Bool_t DoLinear() const
Definition: RuleEnsemble.h:251
std::vector< Double_t > fLinDM
Definition: RuleEnsemble.h:342
void CleanupLinear()
cleanup linear model
void RuleResponseStats()
calculate various statistics for this rule
std::vector< Double_t > fVarImportance
Definition: RuleEnsemble.h:348
UInt_t GetNLinear() const
Definition: RuleEnsemble.h:267
UInt_t GetRulesNCuts(int i) const
Definition: RuleEnsemble.h:273
Double_t GetRulePSS(int i) const
Definition: RuleEnsemble.h:282
Double_t GetRulePSB(int i) const
Definition: RuleEnsemble.h:283
void ClearLinCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:118
const RuleFit * GetRuleFit() const
Definition: RuleEnsemble.h:245
Double_t EvalLinEvent() const
Definition: RuleEnsemble.h:558
void SetLinDM(const std::vector< Double_t > &xmin)
Definition: RuleEnsemble.h:110
void * AddXMLTo(void *parent) const
write rules to XML
std::vector< Double_t > fLinNorm
Definition: RuleEnsemble.h:344
void SetCoefficient(UInt_t i, Double_t v)
Definition: RuleEnsemble.h:104
const std::vector< TMVA::Rule * > & GetRulesConst() const
Definition: RuleEnsemble.h:261
Double_t GetLinNorm(int i) const
Definition: RuleEnsemble.h:276
void ClearLinNorm(Double_t val=1.0)
Definition: RuleEnsemble.h:119
const Event * GetEvent() const
Definition: RuleEnsemble.h:249
const MethodRuleFit * GetMethodRuleFit() const
Get a pointer to the original MethodRuleFit.
void ClearCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:117
Double_t GetAverageRuleSigma() const
Definition: RuleEnsemble.h:290
std::vector< Double_t > fEventLinearVal
Definition: RuleEnsemble.h:369
void SetRuleMinDist(Double_t d)
Definition: RuleEnsemble.h:122
void MakeModel()
create model
void RuleStatistics()
calculate various statistics for this rule
Rule * GetRules(int i)
Definition: RuleEnsemble.h:271
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
void Print() const
print function
const std::vector< Double_t > & GetLinImportance() const
Definition: RuleEnsemble.h:265
Double_t PdfRule(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for rules.
Double_t GetLinCoefficients(int i) const
Definition: RuleEnsemble.h:275
const MethodBase * GetMethodBase() const
Get a pointer to the original MethodRuleFit.
Double_t fAverageSupport
Definition: RuleEnsemble.h:350
friend std::ostream & operator<<(std::ostream &os, const RuleEnsemble &rules)
Double_t GetOffset() const
Definition: RuleEnsemble.h:259
Bool_t DoOnlyRules() const
Definition: RuleEnsemble.h:253
Double_t GetLinImportance(int i) const
Definition: RuleEnsemble.h:279
std::vector< Char_t > fLinTermOK
Definition: RuleEnsemble.h:340
Double_t GetAverageSupport() const
Definition: RuleEnsemble.h:289
void Copy(RuleEnsemble const &other)
copy function
MsgLogger * fLogger
Definition: RuleEnsemble.h:379
std::vector< std::vector< UInt_t > > fRuleMap
Definition: RuleEnsemble.h:372
Double_t CalcLinImportance()
calculate the linear importance for each rule
const std::vector< const TMVA::Event * > * fRuleMapEvents
Definition: RuleEnsemble.h:375
void SetAverageRuleSigma(Double_t v)
Definition: RuleEnsemble.h:131
Double_t CalcRuleImportance()
calculate importance of each rule
Bool_t IsLinTermOK(int i) const
Definition: RuleEnsemble.h:287
std::vector< TH1F * > fLinPDFS
Definition: RuleEnsemble.h:346
Double_t fImportanceRef
Definition: RuleEnsemble.h:349
void PrintRaw(std::ostream &os) const
write rules to stream
std::vector< Double_t > fRulePSB
Definition: RuleEnsemble.h:355
Double_t fAverageRuleSigma
Definition: RuleEnsemble.h:351
void CalcRuleSupport()
calculate the support for all rules
Double_t fImportanceCut
Definition: RuleEnsemble.h:336
void AddOffset(Double_t v)
Definition: RuleEnsemble.h:107
Double_t GetEventLinearVal(UInt_t i) const
Definition: RuleEnsemble.h:292
ELearningModel GetLearningModel() const
Definition: RuleEnsemble.h:256
Double_t PdfLinear(Double_t &nsig, Double_t &ntot) const
This function returns Pr( y = 1 | x ) for the linear terms.
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
Bool_t DoFull() const
Definition: RuleEnsemble.h:255
void SetEvent(const Event &e)
Definition: RuleEnsemble.h:139
Double_t GetEventLinearValNorm(UInt_t i) const
Definition: RuleEnsemble.h:293
void MakeRulesFromTree(const DecisionTree *dtree)
create rules from the decision tree structure
void SetImportanceCut(Double_t minimp=0)
Definition: RuleEnsemble.h:125
const TMVA::Event * GetRuleMapEvent(UInt_t evtidx) const
Definition: RuleEnsemble.h:296
UInt_t GetNRules() const
Definition: RuleEnsemble.h:260
void MakeLinearTerms()
Make the linear terms as in eq 25, ref 2 For this the b and (1-b) quantiles are needed.
void operator=(const RuleEnsemble &other)
Definition: RuleEnsemble.h:234
const Rule * GetRulesConst(int i) const
Definition: RuleEnsemble.h:270
Rule * MakeTheRule(const Node *node)
Make a Rule from a given Node.
std::vector< Double_t > fRuleVarFrac
Definition: RuleEnsemble.h:353
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ,...
A class implementing various fits of rule ensembles.
Definition: RuleFit.h:45
Implementation of a rule.
Definition: Rule.h:48
create variable transformations
std::ostream & operator<<(std::ostream &os, const BinaryTree &tree)
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:212
Double_t Sqrt(Double_t x)
Definition: TMath.h:681
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:180