Logo ROOT  
Reference Guide
RuleFitParams.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : RuleFitParams *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation *
12 * *
13 * Authors (alphabetical): *
14 * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15 * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
16 * *
17 * Copyright (c) 2005: *
18 * CERN, Switzerland *
19 * Iowa State U. *
20 * MPI-K Heidelberg, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27/*! \class TMVA::RuleFitParams
28\ingroup TMVA
29A class doing the actual fitting of a linear model using rules as base functions.
30*/
31#include "TMVA/RuleFitParams.h"
32
33#include "TMVA/DataSetInfo.h"
34#include "TMVA/MethodRuleFit.h"
35#include "TMVA/MsgLogger.h"
36#include "TMVA/RuleEnsemble.h"
37#include "TMVA/RuleFit.h"
38#include "TMVA/Timer.h"
39#include "TMVA/Tools.h"
40#include "TMVA/Types.h"
41
42#include "TTree.h"
43#include "TMath.h"
44
45#include <iomanip>
46#include <numeric>
47#include <algorithm>
48#include <functional>
49
52
60
61////////////////////////////////////////////////////////////////////////////////
62/// constructor
63
65 : fRuleFit ( 0 )
66 , fRuleEnsemble ( 0 )
67 , fNRules ( 0 )
68 , fNLinear ( 0 )
69 , fPathIdx1 ( 0 )
70 , fPathIdx2 ( 0 )
71 , fPerfIdx1 ( 0 )
72 , fPerfIdx2 ( 0 )
73 , fGDNTauTstOK( 0 )
74 , fGDNTau ( 51 )
75 , fGDTauPrec ( 0.02 )
76 , fGDTauScan ( 1000 )
77 , fGDTauMin ( 0.0 )
78 , fGDTauMax ( 1.0 )
79 , fGDTau ( -1.0 )
80 , fGDPathStep ( 0.01 )
81 , fGDNPathSteps ( 1000 )
82 , fGDErrScale ( 1.1 )
83 , fAverageTruth( 0 )
84 , fFstarMedian ( 0 )
85 , fGDNtuple ( 0 )
86 , fNTRisk ( 0 )
87 , fNTErrorRate ( 0 )
88 , fNTNuval ( 0 )
89 , fNTCoefRad ( 0 )
90 , fNTOffset ( 0 )
91 , fNTCoeff ( 0 )
92 , fNTLinCoeff ( 0 )
93 , fsigave( 0 )
94 , fsigrms( 0 )
95 , fbkgave( 0 )
96 , fbkgrms( 0 )
97 , fLogger( new MsgLogger("RuleFit") )
98{
99 Init();
100}
101////////////////////////////////////////////////////////////////////////////////
102/// destructor
103
105{
106 if (fNTCoeff) { delete [] fNTCoeff; fNTCoeff = nullptr; }
107 if (fNTLinCoeff) { delete [] fNTLinCoeff; fNTLinCoeff = nullptr; }
108 delete fLogger;
109}
110
111////////////////////////////////////////////////////////////////////////////////
112/// Initializes all parameters using the RuleEnsemble and the training tree
113
115{
116 if (fRuleFit==0) return;
117 if (fRuleFit->GetMethodRuleFit()==0) {
118 Log() << kFATAL << "RuleFitParams::Init() - MethodRuleFit ptr is null" << Endl;
119 }
120 UInt_t neve = fRuleFit->GetTrainingEvents().size();
121 //
122 fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
123 fNRules = fRuleEnsemble->GetNRules();
124 fNLinear = fRuleEnsemble->GetNLinear();
125
126 //
127 // Fraction of events used for validation should be close of unity..
128 // Always selection from the END
129 //
130 UInt_t ofs;
131 fPerfIdx1 = 0;
132 if (neve>1) {
133 fPerfIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
134 }
135 else {
136 fPerfIdx2 = 0;
137 }
138 ofs = neve - fPerfIdx2 - 1;
139 fPerfIdx1 += ofs;
140 fPerfIdx2 += ofs;
141 //
142 // Fraction of events used for the path search can be allowed to be a smaller value, say 0.5
143 // Alwas select events from the BEGINNING.
144 // This means that the validation and search samples will not overlap if both fractions are <0.5.
145 //
146 fPathIdx1 = 0;
147 if (neve>1) {
148 fPathIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
149 }
150 else {
151 fPathIdx2 = 0;
152 }
153 //
154 // summarize weights
155 //
156 fNEveEffPath = 0;;
157 for (UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
158 fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
159 }
160
161 fNEveEffPerf=0;
162 for (UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
163 fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
164 }
165 //
166 Log() << kVERBOSE << "Path constr. - event index range = [ " << fPathIdx1 << ", " << fPathIdx2 << " ]"
167 << ", effective N(events) = " << fNEveEffPath << Endl;
168 Log() << kVERBOSE << "Error estim. - event index range = [ " << fPerfIdx1 << ", " << fPerfIdx2 << " ]"
169 << ", effective N(events) = " << fNEveEffPerf << Endl;
170 //
171 if (fRuleEnsemble->DoRules())
172 Log() << kDEBUG << "Number of rules in ensemble: " << fNRules << Endl;
173 else
174 Log() << kDEBUG << "Rules are disabled " << Endl;
175
176 if (fRuleEnsemble->DoLinear())
177 Log() << kDEBUG << "Number of linear terms: " << fNLinear << Endl;
178 else
179 Log() << kDEBUG << "Linear terms are disabled " << Endl;
180}
181
182////////////////////////////////////////////////////////////////////////////////
183/// initializes the ntuple
184
186{
187 fGDNtuple= new TTree("MonitorNtuple_RuleFitParams","RuleFit path search");
188 fGDNtuple->Branch("risk", &fNTRisk, "risk/D");
189 fGDNtuple->Branch("error", &fNTErrorRate,"error/D");
190 fGDNtuple->Branch("nuval", &fNTNuval, "nuval/D");
191 fGDNtuple->Branch("coefrad", &fNTCoefRad, "coefrad/D");
192 fGDNtuple->Branch("offset", &fNTOffset, "offset/D");
193 //
194 fNTCoeff = (fNRules >0 ? new Double_t[fNRules] : 0);
195 fNTLinCoeff = (fNLinear>0 ? new Double_t[fNLinear] : 0);
196
197 for (UInt_t i=0; i<fNRules; i++) {
198 fGDNtuple->Branch(Form("a%d",i+1),&fNTCoeff[i],Form("a%d/D",i+1));
199 }
200 for (UInt_t i=0; i<fNLinear; i++) {
201 fGDNtuple->Branch(Form("b%d",i+1),&fNTLinCoeff[i],Form("b%d/D",i+1));
202 }
203}
204
205////////////////////////////////////////////////////////////////////////////////
206/// evaluate the average of each variable and f(x) in the given range
207
209 std::vector<Double_t> &avsel,
210 std::vector<Double_t> &avrul )
211{
212 UInt_t neve = ind2-ind1+1;
213 if (neve<1) {
214 Log() << kFATAL << "<EvaluateAverage> - no events selected for path search -> BUG!" << Endl;
215 }
216 //
217 avsel.clear();
218 avrul.clear();
219 //
220 if (fNLinear>0) avsel.resize(fNLinear,0);
221 if (fNRules>0) avrul.resize(fNRules,0);
222 const std::vector<UInt_t> *eventRuleMap=0;
223 Double_t ew;
224 Double_t sumew=0;
225 //
226 // Loop over events and calculate average of linear terms (normalised) and rule response.
227 //
228 if (fRuleEnsemble->IsRuleMapOK()) { // MakeRuleMap() has been called
229 for ( UInt_t i=ind1; i<ind2+1; i++) {
230 ew = fRuleFit->GetTrainingEventWeight(i);
231 sumew += ew;
232 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
233 avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
234 }
235 // loop over rules
236 UInt_t nrules=0;
237 if (fRuleEnsemble->DoRules()) {
238 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
239 nrules = (*eventRuleMap).size();
240 }
241 for (UInt_t r=0; r<nrules; r++) {
242 avrul[(*eventRuleMap)[r]] += ew;
243 }
244 }
245 }
246 else { // MakeRuleMap() has not yet been called
247 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
248 for ( UInt_t i=ind1; i<ind2+1; i++) {
249 ew = fRuleFit->GetTrainingEventWeight(i);
250 sumew += ew;
251 // first cache rule/lin response
252 /* Double_t val = */ fRuleEnsemble->EvalLinEvent(*((*events)[i]));
253 /* val = */ fRuleEnsemble->EvalEvent(*((*events)[i]));
254 // loop over linear terms
255 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
256 avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
257 }
258 // loop over rules
259 for (UInt_t r=0; r<fNRules; r++) {
260 avrul[r] += ew*fRuleEnsemble->GetEventRuleVal(r);
261 }
262 }
263 }
264 // average variable
265 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
266 avsel[sel] = avsel[sel] / sumew;
267 }
268 // average rule response, excl coeff
269 for (UInt_t r=0; r<fNRules; r++) {
270 avrul[r] = avrul[r] / sumew;
271 }
272}
273
274////////////////////////////////////////////////////////////////////////////////
275/// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
276/// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
277
279{
280 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( e )) );
281 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
282 //
283 return diff*diff*e.GetWeight();
284}
285
286////////////////////////////////////////////////////////////////////////////////
287/// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
288/// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
289
291{
292 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( evtidx )) );
293 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
294 //
295 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
296}
297
298////////////////////////////////////////////////////////////////////////////////
299/// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
300/// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
301
303{
304 Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
305 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,e) );
306 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
307 //
308 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
309}
310
311////////////////////////////////////////////////////////////////////////////////
312/// risk assessment
313
315{
316 UInt_t neve = ind2-ind1+1;
317 if (neve<1) {
318 Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
319 }
320 Double_t rval=0;
321 //
322 // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
323 for ( UInt_t i=ind1; i<ind2+1; i++) {
324 rval += LossFunction(i);
325 }
326 rval = rval/neff;
327
328 return rval;
329}
330
331////////////////////////////////////////////////////////////////////////////////
332/// risk assessment for tau model <itau>
333
335{
336 UInt_t neve = ind2-ind1+1;
337 if (neve<1) {
338 Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
339 }
340 Double_t rval=0;
341 //
342 // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
343 for ( UInt_t i=ind1; i<ind2+1; i++) {
344 rval += LossFunction(i,itau);
345 }
346 rval = rval/neff;
347
348 return rval;
349}
350
351////////////////////////////////////////////////////////////////////////////////
352/// This is the "lasso" penalty
353/// To be used for regression.
354/// --- NOT USED ---
355
357{
358 Log() << kWARNING << "<Penalty> Using unverified code! Check!" << Endl;
359 Double_t rval=0;
360 const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
361 for (UInt_t i=0; i<fNRules; i++) {
362 rval += TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
363 }
364 for (UInt_t i=0; i<fNLinear; i++) {
365 rval += TMath::Abs((*lincoeff)[i]);
366 }
367 return rval;
368}
369
370////////////////////////////////////////////////////////////////////////////////
371/// Initialize GD path search
372
374{
375 if (fGDNTau<2) {
376 fGDNTau = 1;
377 fGDTauScan = 0;
378 }
379 if (fGDTau<0.0) {
380 // fGDNTau = 50; already set in MethodRuleFit
381 fGDTauScan = 1000;
382 fGDTauMin = 0.0;
383 fGDTauMax = 1.0;
384 }
385 else {
386 fGDNTau = 1;
387 fGDTauScan = 0;
388 }
389 // set all taus
390 fGDTauVec.clear();
391 fGDTauVec.resize( fGDNTau );
392 if (fGDNTau==1) {
393 fGDTauVec[0] = fGDTau;
394 }
395 else {
396 // set tau vector - TODO: make a smarter choice of range in tau
397 Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
398 for (UInt_t itau=0; itau<fGDNTau; itau++) {
399 fGDTauVec[itau] = static_cast<Double_t>(itau)*dtau + fGDTauMin;
400 if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
401 }
402 }
403 // initialize path search vectors
404
405 fGradVec.clear();
406 fGradVecLin.clear();
407 fGradVecTst.clear();
408 fGradVecLinTst.clear();
409 fGDErrTst.clear();
410 fGDErrTstOK.clear();
411 fGDOfsTst.clear();
412 fGDCoefTst.clear();
413 fGDCoefLinTst.clear();
414 //
415 // rules
416 //
417 fGDCoefTst.resize(fGDNTau);
418 fGradVec.resize(fNRules,0);
419 fGradVecTst.resize(fGDNTau);
420 for (UInt_t i=0; i<fGDNTau; i++) {
421 fGradVecTst[i].resize(fNRules,0);
422 fGDCoefTst[i].resize(fNRules,0);
423 }
424 //
425 // linear terms
426 //
427 fGDCoefLinTst.resize(fGDNTau);
428 fGradVecLin.resize(fNLinear,0);
429 fGradVecLinTst.resize(fGDNTau);
430 for (UInt_t i=0; i<fGDNTau; i++) {
431 fGradVecLinTst[i].resize(fNLinear,0);
432 fGDCoefLinTst[i].resize(fNLinear,0);
433 }
434 //
435 // error, coefs etc
436 //
437 fGDErrTst.resize(fGDNTau,0);
438 fGDErrTstOK.resize(fGDNTau,kTRUE);
439 fGDOfsTst.resize(fGDNTau,0);
440 fGDNTauTstOK = fGDNTau;
441 //
442 // calculate average selectors and rule responses for the path sample size
443 //
444}
445
446////////////////////////////////////////////////////////////////////////////////
447/// This finds the cutoff parameter tau by scanning several different paths
448
450{
451 if (fGDNTau<2) return 0;
452 if (fGDTauScan==0) return 0;
453
454 if (fGDOfsTst.size()<1)
455 Log() << kFATAL << "BUG! FindGDTau() has been called BEFORE InitGD()." << Endl;
456 //
457 Log() << kINFO << "Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." << Endl;
458 //
459 // Find how many points to scan and how often to calculate the error
460 UInt_t nscan = fGDTauScan; //std::min(static_cast<Int_t>(fGDTauScan),fGDNPathSteps);
461 UInt_t netst = std::min(nscan,UInt_t(100));
462 UInt_t nscanned=0;
463 //
464 //--------------------
465 // loop over the paths
466 //--------------------
467 // The number of MAXIMUM loops is given by nscan.
468 // At each loop, the paths being far away from the minimum
469 // are rejected. Hence at each check (every netst events), the number
470 // of paths searched will be reduced.
471 // The maximum 'distance' from the minimum error rate is
472 // 1 sigma. See RiskPerfTst() for details.
473 //
474 Bool_t doloop=kTRUE;
475 UInt_t ip=0;
476 UInt_t itauMin=0;
477 Timer timer( nscan, "RuleFit" );
478 while (doloop) {
479 // make gradvec
480 MakeTstGradientVector();
481 // update coefs
482 UpdateTstCoefficients();
483 // estimate error and do the sum
484 // do this at index=0, netst-1, 2*netst-1 ...
485 nscanned++;
486 if ( (ip==0) || ((ip+1)%netst==0) ) {
487 // ErrorRateRocTst( );
488 itauMin = RiskPerfTst();
489 Log() << kVERBOSE << Form("%4d",ip+1) << ". tau = " << Form("%4.4f",fGDTauVec[itauMin])
490 << " => error rate = " << fGDErrTst[itauMin] << Endl;
491 }
492 ip++;
493 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
495 if (Log().GetMinType()>kVERBOSE)
496 timer.DrawProgressBar(ip);
497 }
498 //
499 // Set tau and coefs
500 // Downscale tau slightly in order to avoid numerical problems
501 //
502 if (nscanned==0) {
503 Log() << kERROR << "<FindGDTau> number of scanned loops is zero! Should NOT see this message." << Endl;
504 }
505 fGDTau = fGDTauVec[itauMin];
506 fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
507 fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
508 fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
509 Log() << kINFO << "Best path found with tau = " << Form("%4.4f",fGDTau)
510 << " after " << timer.GetElapsedTime() << " " << Endl;
511
512 return nscan;
513}
514
515////////////////////////////////////////////////////////////////////////////////
516/// The following finds the gradient directed path in parameter space.
517/// More work is needed... FT, 24/9/2006
518///
519/// The algorithm is currently as follows (if not otherwise stated, the sample
520/// used below is [fPathIdx1,fPathIdx2]):
521///
522/// 1. Set offset to -average(y(true)) and all coefs=0 => average of F(x)==0
523/// 2. FindGDTau() : start scanning using several paths defined by different tau
524/// choose the tau yielding the best path
525/// 3. start the scanning the chosen path
526/// 4. check error rate at a given frequency
527/// data used for check: [fPerfIdx1,fPerfIdx2]
528/// 5. stop when either of the following conditions are fullfilled:
529/// 1. loop index==fGDNPathSteps
530/// 2. error > fGDErrScale*errmin
531/// 3. only in DEBUG mode: risk is not monotonously decreasing
532///
533/// The algorithm will warn if:
534/// 1. the error rate was still decreasing when loop finished -> increase fGDNPathSteps!
535/// 2. minimum was found at an early stage -> decrease fGDPathStep
536/// 3. DEBUG: risk > previous risk -> entered chaotic region (regularization is too small)
537
539{
540 Log() << kINFO << "GD path scan - the scan stops when the max num. of steps is reached or a min is found"
541 << Endl;
542 Log() << kVERBOSE << "Number of events used per path step = " << fPathIdx2-fPathIdx1+1 << Endl;
543 Log() << kVERBOSE << "Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 << Endl;
544
545 // check if debug mode
546 const Bool_t isVerbose = (Log().GetMinType()<=kVERBOSE);
547 const Bool_t isDebug = (Log().GetMinType()<=kDEBUG);
548
549 // init GD parameters and clear coeff vectors
550 InitGD();
551
552 // evaluate average response of rules/linear terms (with event weights)
553 EvaluateAveragePath();
554 EvaluateAveragePerf();
555
556 // initial estimate; all other a(i) are zero
557 Log() << kVERBOSE << "Creating GD path" << Endl;
558 Log() << kVERBOSE << " N(steps) = " << fGDNPathSteps << Endl;
559 Log() << kVERBOSE << " step = " << fGDPathStep << Endl;
560 Log() << kVERBOSE << " N(tau) = " << fGDNTau << Endl;
561 Log() << kVERBOSE << " N(tau steps) = " << fGDTauScan << Endl;
562 Log() << kVERBOSE << " tau range = [ " << fGDTauVec[0] << " , " << fGDTauVec[fGDNTau-1] << " ]" << Endl;
563
564 // init ntuple
565 if (isDebug) InitNtuple();
566
567 // DEBUG: risk scan
568 Int_t nbadrisk=0; // number of points where risk(i+1)>risk(i)
569 Double_t trisk=0; // time per risk evaluation
570 Double_t strisk=0; // total time
571 Double_t rprev=1e32; // previous risk
572
573 // parameters set at point with min error
574 Double_t errmin=1e32; // min error
575 // Double_t riskMin=0; // risk
576 Int_t indMin=-1; // index
577 std::vector<Double_t> coefsMin; // rule coefs
578 std::vector<Double_t> lincoefsMin; // linear coefs
579 Double_t offsetMin; // offset
580
581
582 // DEBUG: timing
583 clock_t t0=0;
584 Double_t tgradvec;
585 Double_t tupgrade;
586 Double_t tperf;
587 Double_t stgradvec=0;
588 Double_t stupgrade=0;
589 Double_t stperf=0;
590
591 // linear regression to estimate slope of error rate evolution
592 const UInt_t npreg=5;
593 std::vector<Double_t> valx;
594 std::vector<Double_t> valy;
595 std::vector<Double_t> valxy;
596
597 // loop related
598 Bool_t docheck; // true if an error rate check is to be done
599 Int_t iloop=0; // loop index
600 Bool_t found=kFALSE; // true if minimum is found
601 Bool_t riskFlat=kFALSE; // DEBUG: flag is true if risk evolution behaves badly
602 Bool_t done = kFALSE; // flag that the scan is done
603
604 // calculate how often to check error rate
605 int imod = fGDNPathSteps/100;
606 if (imod<100) imod = std::min(100,fGDNPathSteps);
607 if (imod>100) imod=100;
608
609 // reset coefficients
610 fAverageTruth = -CalcAverageTruth();
611 offsetMin = fAverageTruth;
612 fRuleEnsemble->SetOffset(offsetMin);
613 fRuleEnsemble->ClearCoefficients(0);
614 fRuleEnsemble->ClearLinCoefficients(0);
615 for (UInt_t i=0; i<fGDOfsTst.size(); i++) {
616 fGDOfsTst[i] = offsetMin;
617 }
618 Log() << kVERBOSE << "Obtained initial offset = " << offsetMin << Endl;
619
620 // find the best tau - returns the number of steps performed in scan
621 Int_t nprescan = FindGDTau();
622
623 //
624 //
625 // calculate F*
626 //
627 // CalcFStar(fPerfIdx1, fPerfIdx2);
628 //
629
630 // set some ntuple values
631 fNTRisk = rprev;
632 fNTCoefRad = -1.0;
633 fNTErrorRate = 0;
634
635 // a local flag indicating for what reason the search was stopped
636 Int_t stopCondition=0;
637
638 Log() << kINFO << "Fitting model..." << Endl;
639 // start loop with timer
640 Timer timer( fGDNPathSteps, "RuleFit" );
641 Log() << kWARNING;
642 while (!done) {
643 // Make gradient vector (eq 44, ref 1)
644 if (isVerbose) t0 = clock();
645 MakeGradientVector();
646 if (isVerbose) {
647 tgradvec = Double_t(clock()-t0)/CLOCKS_PER_SEC;
648 stgradvec += tgradvec;
649 }
650
651 // Calculate the direction in parameter space (eq 25, ref 1) and update coeffs (eq 22, ref 1)
652 if (isVerbose) t0 = clock();
653 UpdateCoefficients();
654 if (isVerbose) {
655 tupgrade = Double_t(clock()-t0)/CLOCKS_PER_SEC;
656 stupgrade += tupgrade;
657 }
658
659 // don't check error rate every loop
660 docheck = ((iloop==0) ||((iloop+1)%imod==0));
661
662 if (docheck) {
663 // draw progressbar only if not debug
664 if (!isVerbose)
665 timer.DrawProgressBar(iloop);
666 fNTNuval = Double_t(iloop)*fGDPathStep;
667 fNTRisk = 0.0;
668
669 // check risk evolution
670
671 if (isDebug) FillCoefficients();
672 fNTCoefRad = fRuleEnsemble->CoefficientRadius();
673
674 // calculate risk
675 t0 = clock();
676 fNTRisk = RiskPath();
677 trisk = Double_t(clock()-t0)/CLOCKS_PER_SEC;
678 strisk += trisk;
679 //
680 // Check for an increase in risk.
681 // Such an increase would imply that the regularization is too small.
682 // Stop the iteration if this happens.
683 //
684 if (fNTRisk>=rprev) {
685 if (fNTRisk>rprev) {
686 nbadrisk++;
687 Log() << "Risk(i+1)>=Risk(i) in path" << Endl;
688 riskFlat=(nbadrisk>3);
689 if (riskFlat) {
690 Log() << kWARNING << "Chaotic behaviour of risk evolution" << Endl;
691 Log() << "--- STOPPING MINIMISATION ---" << Endl;
692 Log() << "This may be OK if minimum is already found" << Endl;
693 }
694 }
695 }
696 rprev = fNTRisk;
697 //
698 // Estimate the error rate using cross validation
699 // Well, not quite full cross validation since we only
700 // use ONE model.
701 //
702 if (isVerbose) t0 = clock();
703 fNTErrorRate = 0;
704
705 // Check error rate
706 Double_t errroc;//= ErrorRateRoc();
707 Double_t riskPerf = RiskPerf();
708 // Double_t optimism = Optimism();
709 //
710 errroc = riskPerf;
711 //
712 fNTErrorRate = errroc;
713 //
714 if (isVerbose) {
715 tperf = Double_t(clock()-t0)/CLOCKS_PER_SEC;
716 stperf +=tperf;
717 }
718 //
719 // Always take the last min.
720 // For each step the risk is reduced.
721 //
722 if (fNTErrorRate<=errmin) {
723 errmin = fNTErrorRate;
724 // riskMin = fNTRisk;
725 indMin = iloop;
726 fRuleEnsemble->GetCoefficients(coefsMin);
727 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
728 offsetMin = fRuleEnsemble->GetOffset();
729 }
730 if ( fNTErrorRate > fGDErrScale*errmin) found = kTRUE;
731 //
732 // check slope of last couple of points
733 //
734 if (valx.size()==npreg) {
735 valx.erase(valx.begin());
736 valy.erase(valy.begin());
737 valxy.erase(valxy.begin());
738 }
739 valx.push_back(fNTNuval);
740 valy.push_back(fNTErrorRate);
741 valxy.push_back(fNTErrorRate*fNTNuval);
742
744
745 //
746 if (isDebug) fGDNtuple->Fill();
747 if (isVerbose) {
748 Log() << kVERBOSE << "ParamsIRE : "
749 << std::setw(10)
750 << Form("%8d",iloop+1) << " "
751 << Form("%4.4f",fNTRisk) << " "
752 << Form("%4.4f",riskPerf) << " "
753 << Form("%4.4f",fNTRisk+riskPerf) << " "
754 // << Form("%4.4f",fsigave+fbkgave) << " "
755 // << Form("%4.4f",fsigave) << " "
756 // << Form("%4.4f",fsigrms) << " "
757 // << Form("%4.4f",fbkgave) << " "
758 // << Form("%4.4f",fbkgrms) << " "
759
760 // << Form("%4.4f",fRuleEnsemble->CoefficientRadius())
761 << Endl;
762 }
763 }
764 iloop++;
765 // Stop iteration under various conditions
766 // * The condition R(i+1)<R(i) is no longer true (when then implicit regularization is too weak)
767 // * If the current error estimate is > factor*errmin (factor = 1.1)
768 // * We have reach the last step...
769 Bool_t endOfLoop = (iloop==fGDNPathSteps);
770 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
771 if (riskFlat) {
772 stopCondition = 1;
773 }
774 else if (endOfLoop) {
775 stopCondition = 2;
776 }
777 if (indMin<0) {
778 Log() << kWARNING << "BUG TRAP: should not be here - still, this bug is harmless;)" << Endl;
779 errmin = fNTErrorRate;
780 // riskMin = fNTRisk;
781 indMin = iloop;
782 fRuleEnsemble->GetCoefficients(coefsMin);
783 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
784 offsetMin = fRuleEnsemble->GetOffset();
785 }
786 found = kTRUE;
787 }
788 done = (found);
789 }
790 Log() << Endl;
791 Log() << kINFO << "Minimisation elapsed time : " << timer.GetElapsedTime() << " " << Endl;
792 Log() << kINFO << "----------------------------------------------------------------" << Endl;
793 Log() << kINFO << "Found minimum at step " << indMin+1 << " with error = " << errmin << Endl;
794 Log() << kINFO << "Reason for ending loop: ";
795 switch (stopCondition) {
796 case 0:
797 Log() << kINFO << "clear minima found";
798 break;
799 case 1:
800 Log() << kINFO << "chaotic behaviour of risk";
801 break;
802 case 2:
803 Log() << kINFO << "end of loop reached";
804 break;
805 default:
806 Log() << kINFO << "unknown!";
807 break;
808 }
809 Log() << Endl;
810 Log() << kINFO << "----------------------------------------------------------------" << Endl;
811
812 // check if early minima - might be an indication of too large stepsize
813 if ( Double_t(indMin)/Double_t(nprescan+fGDNPathSteps) < 0.05 ) {
814 Log() << kWARNING << "Reached minimum early in the search" << Endl;
815 Log() << "Check results and maybe decrease GDStep size" << Endl;
816 }
817 //
818 // quick check of the sign of the slope for the last npreg points
819 //
820 Double_t sumx = std::accumulate( valx.begin(), valx.end(), Double_t() );
821 Double_t sumxy = std::accumulate( valxy.begin(), valxy.end(), Double_t() );
822 Double_t sumy = std::accumulate( valy.begin(), valy.end(), Double_t() );
823 Double_t slope = Double_t(valx.size())*sumxy - sumx*sumy;
824 if (slope<0) {
825 Log() << kINFO << "The error rate was still decreasing at the end of the path" << Endl;
826 Log() << kINFO << "Increase number of steps (GDNSteps)." << Endl;
827 }
828 //
829 // set coefficients
830 //
831 if (found) {
832 fRuleEnsemble->SetCoefficients( coefsMin );
833 fRuleEnsemble->SetLinCoefficients( lincoefsMin );
834 fRuleEnsemble->SetOffset( offsetMin );
835 }
836 else {
837 Log() << kFATAL << "BUG TRAP: minimum not found in MakeGDPath()" << Endl;
838 }
839
840 //
841 // print timing info (VERBOSE mode)
842 //
843 if (isVerbose) {
844 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
845 Log() << kVERBOSE << "Timing per loop (ms):" << Endl;
846 Log() << kVERBOSE << " gradvec = " << 1000*stgradvec/iloop << Endl;
847 Log() << kVERBOSE << " upgrade = " << 1000*stupgrade/iloop << Endl;
848 Log() << kVERBOSE << " risk = " << 1000*strisk/iloop << Endl;
849 Log() << kVERBOSE << " perf = " << 1000*stperf/iloop << Endl;
850 Log() << kVERBOSE << " loop = " << 1000*stloop/iloop << Endl;
851 //
852 Log() << kVERBOSE << " GDInit = " << 1000*gGDInit/iloop << Endl;
853 Log() << kVERBOSE << " GDPtr = " << 1000*gGDPtr/iloop << Endl;
854 Log() << kVERBOSE << " GDEval = " << 1000*gGDEval/iloop << Endl;
855 Log() << kVERBOSE << " GDEvalRule = " << 1000*gGDEvalRule/iloop << Endl;
856 Log() << kVERBOSE << " GDNorm = " << 1000*gGDNorm/iloop << Endl;
857 Log() << kVERBOSE << " GDRuleLoop = " << 1000*gGDRuleLoop/iloop << Endl;
858 Log() << kVERBOSE << " GDLinLoop = " << 1000*gGDLinLoop/iloop << Endl;
859 }
860 //
861 // write ntuple (DEBUG)
862 if (isDebug) fGDNtuple->Write();
863}
864
865////////////////////////////////////////////////////////////////////////////////
866/// helper function to store the rule coefficients in local arrays
867
869{
870 fNTOffset = fRuleEnsemble->GetOffset();
871 //
872 for (UInt_t i=0; i<fNRules; i++) {
873 fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
874 }
875 for (UInt_t i=0; i<fNLinear; i++) {
876 fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
877 }
878}
879
880////////////////////////////////////////////////////////////////////////////////
881/// Estimates F* (optimum scoring function) for all events for the given sets.
882/// The result is used in ErrorRateReg().
883/// --- NOT USED ---
884
886{
887 Log() << kWARNING << "<CalcFStar> Using unverified code! Check!" << Endl;
888 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
889 if (neve<1) {
890 Log() << kFATAL << "<CalcFStar> Invalid start/end indices!" << Endl;
891 return;
892 }
893 //
894 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
895 //
896 fFstar.clear();
897 std::vector<Double_t> fstarSorted;
898 Double_t fstarVal;
899 // loop over all events and estimate F* for each event
900 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
901 const Event& e = *(*events)[i];
902 fstarVal = fRuleEnsemble->FStar(e);
903 fFstar.push_back(fstarVal);
904 fstarSorted.push_back(fstarVal);
905 if (TMath::IsNaN(fstarVal)) Log() << kFATAL << "F* is NAN!" << Endl;
906 }
907 // sort F* and find median
908 std::sort( fstarSorted.begin(), fstarSorted.end() );
909 UInt_t ind = neve/2;
910 if (neve&1) { // odd number of events
911 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
912 }
913 else { // even
914 fFstarMedian = fstarSorted[ind];
915 }
916}
917
918////////////////////////////////////////////////////////////////////////////////
919/// implementation of eq. 7.17 in Hastie,Tibshirani & Friedman book
920/// this is the covariance between the estimated response yhat and the
921/// true value y.
922/// NOT REALLY SURE IF THIS IS CORRECT!
923/// --- THIS IS NOT USED ---
924
926{
927 Log() << kWARNING << "<Optimism> Using unverified code! Check!" << Endl;
928 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
929 if (neve<1) {
930 Log() << kFATAL << "<Optimism> Invalid start/end indices!" << Endl;
931 }
932 //
933 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
934 //
935 Double_t sumy=0;
936 Double_t sumyhat=0;
937 Double_t sumyhaty=0;
938 Double_t sumw2=0;
939 Double_t sumw=0;
940 Double_t yhat;
941 Double_t y;
942 Double_t w;
943 //
944 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
945 const Event& e = *(*events)[i];
946 yhat = fRuleEnsemble->EvalEvent(i); // evaluated using the model
947 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0); // the truth
948 w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf; // the weight, reweighted such that sum=1
949 sumy += w*y;
950 sumyhat += w*yhat;
951 sumyhaty += w*yhat*y;
952 sumw2 += w*w;
953 sumw += w;
954 }
955 Double_t div = 1.0-sumw2;
956 Double_t cov = sumyhaty - sumyhat*sumy;
957 return 2.0*cov/div;
958}
959
960////////////////////////////////////////////////////////////////////////////////
961/// Estimates the error rate with the current set of parameters
962/// This code is pretty messy at the moment.
963/// Cleanup is needed.
964/// -- NOT USED ---
965
967{
968 Log() << kWARNING << "<ErrorRateReg> Using unverified code! Check!" << Endl;
969 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
970 if (neve<1) {
971 Log() << kFATAL << "<ErrorRateReg> Invalid start/end indices!" << Endl;
972 }
973 if (fFstar.size()!=neve) {
974 Log() << kFATAL << "--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
975 << " Fstar.size() = " << fFstar.size() << " , N(events) = " << neve << Endl;
976 }
977 //
978 Double_t sF;
979 //
980 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
981 //
982 Double_t sumdf = 0;
983 Double_t sumdfmed = 0;
984 //
985 // A bit messy here.
986 // I believe the binary error classification is appropriate here.
987 // The problem is stability.
988 //
989 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
990 const Event& e = *(*events)[i];
991 sF = fRuleEnsemble->EvalEvent( e );
992 // scaled abs error, eq 20 in RuleFit paper
993 sumdf += TMath::Abs(fFstar[i-fPerfIdx1] - sF);
994 sumdfmed += TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
995 }
996 // scaled abs error, eq 20
997 // This error (df) is large - need to think on how to compensate...
998 //
999 return sumdf/sumdfmed;
1000}
1001
1002////////////////////////////////////////////////////////////////////////////////
1003///
1004/// Estimates the error rate with the current set of parameters
1005/// It uses a binary estimate of (y-F*(x))
1006/// (y-F*(x)) = (Num of events where sign(F)!=sign(y))/Neve
1007/// y = {+1 if event is signal, -1 otherwise}
1008/// --- NOT USED ---
1009
1011{
1012 Log() << kWARNING << "<ErrorRateBin> Using unverified code! Check!" << Endl;
1013 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1014 if (neve<1) {
1015 Log() << kFATAL << "<ErrorRateBin> Invalid start/end indices!" << Endl;
1016 }
1017 //
1018 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1019 //
1020 Double_t sumdfbin = 0;
1021 Double_t dneve = Double_t(neve);
1022 Int_t signF, signy;
1023 Double_t sF;
1024 //
1025 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1026 const Event& e = *(*events)[i];
1027 sF = fRuleEnsemble->EvalEvent( e );
1028 // Double_t sFstar = fRuleEnsemble->FStar(e); // THIS CAN BE CALCULATED ONCE!
1029 signF = (sF>0 ? +1:-1);
1030 // signy = (sFStar>0 ? +1:-1);
1031 signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
1032 sumdfbin += TMath::Abs(Double_t(signF-signy))*0.5;
1033 }
1034 Double_t f = sumdfbin/dneve;
1035 // Double_t df = f*TMath::Sqrt((1.0/sumdfbin) + (1.0/dneve));
1036 return f;
1037}
1038
1039////////////////////////////////////////////////////////////////////////////////
1040/// Estimates the error rate with the current set of parameters.
1041/// It calculates the area under the bkg rejection vs signal efficiency curve.
1042/// The value returned is 1-area.
1043
1044Double_t TMVA::RuleFitParams::ErrorRateRocRaw( std::vector<Double_t> & sFsig,
1045 std::vector<Double_t> & sFbkg )
1046
1047{
1048 std::sort(sFsig.begin(), sFsig.end());
1049 std::sort(sFbkg.begin(), sFbkg.end());
1050 const Double_t minsig = sFsig.front();
1051 const Double_t minbkg = sFbkg.front();
1052 const Double_t maxsig = sFsig.back();
1053 const Double_t maxbkg = sFbkg.back();
1054 const Double_t minf = std::min(minsig,minbkg);
1055 const Double_t maxf = std::max(maxsig,maxbkg);
1056 const Int_t nsig = Int_t(sFsig.size());
1057 const Int_t nbkg = Int_t(sFbkg.size());
1058 const Int_t np = std::min((nsig+nbkg)/4,50);
1059 const Double_t df = (maxf-minf)/(np-1);
1060 //
1061 // calculate area under rejection/efficiency curve
1062 //
1063 Double_t fcut;
1064 std::vector<Double_t>::const_iterator indit;
1065 Int_t nrbkg;
1066 Int_t nesig;
1067 Int_t pnesig=0;
1068 Double_t rejb=0;
1069 Double_t effs=1.0;
1070 Double_t prejb=0;
1071 Double_t peffs=1.0;
1072 // Double_t drejb;
1073 Double_t deffs;
1074 Double_t area=0;
1075 Int_t npok=0;
1076 //
1077 // loop over range of F [minf,maxf]
1078 //
1079 for (Int_t i=0; i<np; i++) {
1080 fcut = minf + df*Double_t(i);
1081 indit = std::find_if(sFsig.begin(), sFsig.end(),
1082 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1083 nesig = sFsig.end()-indit; // number of sig accepted with F>cut
1084 if (TMath::Abs(pnesig-nesig)>0) {
1085 npok++;
1086 indit = std::find_if(sFbkg.begin(), sFbkg.end(),
1087 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1088 nrbkg = indit-sFbkg.begin(); // number of bkg rejected with F>cut
1089 rejb = Double_t(nrbkg)/Double_t(nbkg);
1090 effs = Double_t(nesig)/Double_t(nsig);
1091 //
1092 // drejb = rejb-prejb;
1093 deffs = effs-peffs;
1094 area += 0.5*TMath::Abs(deffs)*(rejb+prejb); // trapezoid
1095 prejb = rejb;
1096 peffs = effs;
1097 }
1098 pnesig = nesig;
1099 }
1100 area += 0.5*(1+rejb)*effs; // extrapolate to the end point
1101
1102 return (1.0-area);
1103}
1104
1105////////////////////////////////////////////////////////////////////////////////
1106/// Estimates the error rate with the current set of parameters.
1107/// It calculates the area under the bkg rejection vs signal efficiency curve.
1108/// The value returned is 1-area.
1109/// This works but is less efficient than calculating the Risk using RiskPerf().
1110
1112{
1113 Log() << kWARNING << "<ErrorRateRoc> Should not be used in the current version! Check!" << Endl;
1114 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1115 if (neve<1) {
1116 Log() << kFATAL << "<ErrorRateRoc> Invalid start/end indices!" << Endl;
1117 }
1118 //
1119 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1120 //
1121 Double_t sF;
1122 //
1123 std::vector<Double_t> sFsig;
1124 std::vector<Double_t> sFbkg;
1125 Double_t sumfsig=0;
1126 Double_t sumfbkg=0;
1127 Double_t sumf2sig=0;
1128 Double_t sumf2bkg=0;
1129 //
1130 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1131 const Event& e = *(*events)[i];
1132 sF = fRuleEnsemble->EvalEvent(i);// * fRuleFit->GetTrainingEventWeight(i);
1133 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
1134 sFsig.push_back(sF);
1135 sumfsig +=sF;
1136 sumf2sig +=sF*sF;
1137 }
1138 else {
1139 sFbkg.push_back(sF);
1140 sumfbkg +=sF;
1141 sumf2bkg +=sF*sF;
1142 }
1143 }
1144 fsigave = sumfsig/sFsig.size();
1145 fbkgave = sumfbkg/sFbkg.size();
1146 fsigrms = TMath::Sqrt(gTools().ComputeVariance(sumf2sig,sumfsig,sFsig.size()));
1147 fbkgrms = TMath::Sqrt(gTools().ComputeVariance(sumf2bkg,sumfbkg,sFbkg.size()));
1148 //
1149 return ErrorRateRocRaw( sFsig, sFbkg );
1150}
1151
1152////////////////////////////////////////////////////////////////////////////////
1153/// Estimates the error rate with the current set of parameters.
1154/// It calculates the area under the bkg rejection vs signal efficiency curve.
1155/// The value returned is 1-area.
1156///
1157/// See comment under ErrorRateRoc().
1158
1160{
1161 Log() << kWARNING << "<ErrorRateRocTst> Should not be used in the current version! Check!" << Endl;
1162 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1163 if (neve<1) {
1164 Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1165 return;
1166 }
1167 //
1168 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1169 //
1170 // std::vector<Double_t> sF;
1171 Double_t sF;
1172 std::vector< std::vector<Double_t> > sFsig;
1173 std::vector< std::vector<Double_t> > sFbkg;
1174 //
1175 sFsig.resize( fGDNTau );
1176 sFbkg.resize( fGDNTau );
1177 // sF.resize( fGDNTau );
1178
1179 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1180 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1181 // if (itau==0) sF = fRuleEnsemble->EvalEvent( *(*events)[i], fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1182 // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1183 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1184 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1185 sFsig[itau].push_back(sF);
1186 }
1187 else {
1188 sFbkg[itau].push_back(sF);
1189 }
1190 }
1191 }
1192 Double_t err;
1193
1194 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1195 err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1196 fGDErrTst[itau] = err;
1197 }
1198}
1199
1200////////////////////////////////////////////////////////////////////////////////
1201/// Estimates the error rate with the current set of parameters.
1202/// using the <Perf> subsample.
1203/// Return the tau index giving the lowest error
1204
1206{
1207 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1208 if (neve<1) {
1209 Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1210 return 0;
1211 }
1212 //
1213 Double_t sumx = 0;
1214 Double_t sumx2 = 0;
1215 Double_t maxx = -100.0;
1216 Double_t minx = 1e30;
1217 UInt_t itaumin = 0;
1218 UInt_t nok=0;
1219 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1220 if (fGDErrTstOK[itau]) {
1221 nok++;
1222 fGDErrTst[itau] = RiskPerf(itau);
1223 sumx += fGDErrTst[itau];
1224 sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1225 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1226 if (fGDErrTst[itau]<minx) {
1227 minx=fGDErrTst[itau];
1228 itaumin = itau;
1229 }
1230 }
1231 }
1232 Double_t sigx = TMath::Sqrt(gTools().ComputeVariance( sumx2, sumx, nok ) );
1233 Double_t maxacc = minx+sigx;
1234 //
1235 if (nok>0) {
1236 nok = 0;
1237 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1238 if (fGDErrTstOK[itau]) {
1239 if (fGDErrTst[itau] > maxacc) {
1240 fGDErrTstOK[itau] = kFALSE;
1241 }
1242 else {
1243 nok++;
1244 }
1245 }
1246 }
1247 }
1248 fGDNTauTstOK = nok;
1249 Log() << kVERBOSE << "TAU: "
1250 << itaumin << " "
1251 << nok << " "
1252 << minx << " "
1253 << maxx << " "
1254 << sigx << Endl;
1255 //
1256 return itaumin;
1257}
1258
1259////////////////////////////////////////////////////////////////////////////////
1260/// make test gradient vector for all tau
1261/// same algorithm as MakeGradientVector()
1262
1264{
1265 UInt_t neve = fPathIdx1-fPathIdx2+1;
1266 if (neve<1) {
1267 Log() << kFATAL << "<MakeTstGradientVector> Invalid start/end indices!" << Endl;
1268 return;
1269 }
1270 //
1271 Double_t norm = 2.0/fNEveEffPath;
1272 //
1273 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1274
1275 // Clear gradient vectors
1276 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1277 if (fGDErrTstOK[itau]) {
1278 for (UInt_t ir=0; ir<fNRules; ir++) {
1279 fGradVecTst[itau][ir]=0;
1280 }
1281 for (UInt_t il=0; il<fNLinear; il++) {
1282 fGradVecLinTst[itau][il]=0;
1283 }
1284 }
1285 }
1286 //
1287 // Double_t val; // temp store
1288 Double_t sF; // score function value
1289 Double_t r; // eq 35, ref 1
1290 Double_t y; // true score (+1 or -1)
1291 const std::vector<UInt_t> *eventRuleMap=0;
1292 UInt_t rind;
1293 //
1294 // Loop over all events
1295 //
1296 UInt_t nsfok=0;
1297 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1298 const Event *e = (*events)[i];
1299 UInt_t nrules=0;
1300 if (fRuleEnsemble->DoRules()) {
1301 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1302 nrules = (*eventRuleMap).size();
1303 }
1304 for (UInt_t itau=0; itau<fGDNTau; itau++) { // loop over tau
1305 // if (itau==0) sF = fRuleEnsemble->EvalEvent( *e, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1306 // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1307 if (fGDErrTstOK[itau]) {
1308 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1309 if (TMath::Abs(sF)<1.0) {
1310 nsfok++;
1311 r = 0;
1312 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1313 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1314 // rule gradient vector
1315 for (UInt_t ir=0; ir<nrules; ir++) {
1316 rind = (*eventRuleMap)[ir];
1317 fGradVecTst[itau][rind] += r;
1318 }
1319 // linear terms
1320 for (UInt_t il=0; il<fNLinear; il++) {
1321 fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i, kTRUE );
1322 }
1323 } // if (TMath::Abs(F)<xxx)
1324 }
1325 }
1326 }
1327}
1328
1329////////////////////////////////////////////////////////////////////////////////
1330/// Establish maximum gradient for rules, linear terms and the offset
1331/// for all taus TODO: do not need index range!
1332
1334{
1335 Double_t maxr, maxl, cthresh, val;
1336 // loop over all taus
1337 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1338 if (fGDErrTstOK[itau]) {
1339 // find max gradient
1340 maxr = ( (fNRules>0 ?
1341 TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(), AbsValue()))):0) );
1342 maxl = ( (fNLinear>0 ?
1343 TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(), AbsValue()))):0) );
1344
1345 // Use the maximum as a threshold
1346 Double_t maxv = (maxr>maxl ? maxr:maxl);
1347 cthresh = maxv * fGDTauVec[itau];
1348
1349 // Add to offset, if gradient is large enough:
1350 // Loop over the gradient vector and move to next set of coefficients
1351 // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1352 //
1353 // Step-size is divided by 10 when looking for the best path.
1354 //
1355 if (maxv>0) {
1356 const Double_t stepScale=1.0;
1357 for (UInt_t i=0; i<fNRules; i++) {
1358 val = fGradVecTst[itau][i];
1359
1360 if (TMath::Abs(val)>=cthresh) {
1361 fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1362 }
1363 }
1364 // Loop over the gradient vector for the linear part and move to next set of coefficients
1365 for (UInt_t i=0; i<fNLinear; i++) {
1366 val = fGradVecLinTst[itau][i];
1367 if (TMath::Abs(val)>=cthresh) {
1368 fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1369 }
1370 }
1371 }
1372 }
1373 }
1374 // set the offset - should be outside the itau loop!
1375 CalcTstAverageResponse();
1376}
1377
1378////////////////////////////////////////////////////////////////////////////////
1379/// make gradient vector
1380
1382{
1383 clock_t t0;
1384 // clock_t t10;
1385 t0 = clock();
1386 //
1387 UInt_t neve = fPathIdx2-fPathIdx1+1;
1388 if (neve<1) {
1389 Log() << kFATAL << "<MakeGradientVector> Invalid start/end indices!" << Endl;
1390 return;
1391 }
1392 //
1393 const Double_t norm = 2.0/fNEveEffPath;
1394 //
1395 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1396
1397 // Clear gradient vectors
1398 for (UInt_t ir=0; ir<fNRules; ir++) {
1399 fGradVec[ir]=0;
1400 }
1401 for (UInt_t il=0; il<fNLinear; il++) {
1402 fGradVecLin[il]=0;
1403 }
1404 //
1405 // Double_t val; // temp store
1406 Double_t sF; // score function value
1407 Double_t r; // eq 35, ref 1
1408 Double_t y; // true score (+1 or -1)
1409 const std::vector<UInt_t> *eventRuleMap=0;
1410 UInt_t rind;
1411 //
1412 gGDInit += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1413
1414 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1415 const Event *e = (*events)[i];
1416
1417 // t0 = clock(); //DEB
1418 sF = fRuleEnsemble->EvalEvent( i ); // should not contain the weight
1419 // gGDEval += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1420 if (TMath::Abs(sF)<1.0) {
1421 UInt_t nrules=0;
1422 if (fRuleEnsemble->DoRules()) {
1423 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1424 nrules = (*eventRuleMap).size();
1425 }
1426 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1427 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1428 // rule gradient vector
1429 for (UInt_t ir=0; ir<nrules; ir++) {
1430 rind = (*eventRuleMap)[ir];
1431 fGradVec[rind] += r;
1432 }
1433 // gGDRuleLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1434 // linear terms
1435 // t0 = clock(); //DEB
1436 for (UInt_t il=0; il<fNLinear; il++) {
1437 fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i, kTRUE );
1438 }
1439 // gGDLinLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1440 } // if (TMath::Abs(F)<xxx)
1441 }
1442}
1443
1444////////////////////////////////////////////////////////////////////////////////
1445/// Establish maximum gradient for rules, linear terms and the offset
1446
1448{
1449 Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1450 TMath::Abs(*(std::max_element( fGradVec.begin(), fGradVec.end(), AbsValue()))):0) );
1451 Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1452 TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(), AbsValue()))):0) );
1453 // Use the maximum as a threshold
1454 Double_t maxv = (maxr>maxl ? maxr:maxl);
1455 Double_t cthresh = maxv * fGDTau;
1456
1457 Double_t useRThresh;
1458 Double_t useLThresh;
1459 //
1460 // Choose thresholds.
1461 //
1462 useRThresh = cthresh;
1463 useLThresh = cthresh;
1464
1465 Double_t gval, lval, coef, lcoef;
1466
1467 // Add to offset, if gradient is large enough:
1468 // Loop over the gradient vector and move to next set of coefficients
1469 // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1470 if (maxv>0) {
1471 for (UInt_t i=0; i<fGradVec.size(); i++) {
1472 gval = fGradVec[i];
1473 if (TMath::Abs(gval)>=useRThresh) {
1474 coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1475 fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1476 }
1477 }
1478
1479 // Loop over the gradient vector for the linear part and move to next set of coefficients
1480 for (UInt_t i=0; i<fGradVecLin.size(); i++) {
1481 lval = fGradVecLin[i];
1482 if (TMath::Abs(lval)>=useLThresh) {
1483 lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
1484 fRuleEnsemble->SetLinCoefficient(i,lcoef);
1485 }
1486 }
1487 // Set the offset
1488 Double_t offset = CalcAverageResponse();
1489 fRuleEnsemble->SetOffset( offset );
1490 }
1491}
1492
1493////////////////////////////////////////////////////////////////////////////////
1494/// calc average response for all test paths - TODO: see comment under CalcAverageResponse()
1495/// note that 0 offset is used
1496
1498{
1499 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1500 if (fGDErrTstOK[itau]) {
1501 fGDOfsTst[itau] = 0;
1502 for (UInt_t s=0; s<fNLinear; s++) {
1503 fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
1504 }
1505 for (UInt_t r=0; r<fNRules; r++) {
1506 fGDOfsTst[itau] -= fGDCoefTst[itau][r] * fAverageRulePath[r];
1507 }
1508 }
1509 }
1510 //
1511}
1512
1513////////////////////////////////////////////////////////////////////////////////
1514/// calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
1515///
1516/// note that 0 offset is used
1517
1519{
1520 Double_t ofs = 0;
1521 for (UInt_t s=0; s<fNLinear; s++) {
1522 ofs -= fRuleEnsemble->GetLinCoefficients(s) * fAverageSelectorPath[s];
1523 }
1524 for (UInt_t r=0; r<fNRules; r++) {
1525 ofs -= fRuleEnsemble->GetRules(r)->GetCoefficient() * fAverageRulePath[r];
1526 }
1527 return ofs;
1528}
1529
1530////////////////////////////////////////////////////////////////////////////////
1531/// calculate the average truth
1532
1534{
1535 if (fPathIdx2<=fPathIdx1) {
1536 Log() << kFATAL << "<CalcAverageTruth> Invalid start/end indices!" << Endl;
1537 return 0;
1538 }
1539 Double_t sum=0;
1540 Double_t ensig=0;
1541 Double_t enbkg=0;
1542 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1543 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1544 Double_t ew = fRuleFit->GetTrainingEventWeight(i);
1545 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1546 else enbkg += ew;
1547 sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1548 }
1549 Log() << kVERBOSE << "Effective number of signal / background = " << ensig << " / " << enbkg << Endl;
1550
1551 return sum/fNEveEffPath;
1552}
1553
1554////////////////////////////////////////////////////////////////////////////////
1555
1557 return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
1558}
1559
1560////////////////////////////////////////////////////////////////////////////////
1561
1563 fLogger->SetMinType(t);
1564}
ROOT::R::TRInterface & r
Definition: Object.C:4
#define f(i)
Definition: RSha256.hxx:104
#define h(i)
Definition: RSha256.hxx:106
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:45
unsigned int UInt_t
Definition: RtypesCore.h:46
const Bool_t kFALSE
Definition: RtypesCore.h:101
bool Bool_t
Definition: RtypesCore.h:63
double Double_t
Definition: RtypesCore.h:59
const Bool_t kTRUE
Definition: RtypesCore.h:100
Double_t gGDNorm
Double_t gGDLinLoop
Double_t gGDInit
Double_t gGDEval
Bool_t gFIRSTTST
Double_t gGDPtr
Double_t gGDEvalRule
Bool_t gFIRSTORG
Double_t gGDRuleLoop
char * Form(const char *fmt,...)
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:57
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...
void MakeGDPath()
The following finds the gradient directed path in parameter space.
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk assessment
Double_t Optimism()
implementation of eq.
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
virtual ~RuleFitParams()
destructor
RuleFitParams()
constructor
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Double_t CalcAverageResponse()
calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
void SetMsgType(EMsgType t)
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
void FillCoefficients()
helper function to store the rule coefficients in local arrays
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
void InitGD()
Initialize GD path search.
Int_t Type(const Event *e) const
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
Double_t CalcAverageTruth()
calculate the average truth
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
void MakeGradientVector()
make gradient vector
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
void InitNtuple()
initializes the ntuple
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment.
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
Estimates the error rate with the current set of parameters.
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:146
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:202
EMsgType
Definition: Types.h:55
@ kDEBUG
Definition: Types.h:56
@ kVERBOSE
Definition: Types.h:57
@ kERROR
Definition: Types.h:60
@ kINFO
Definition: Types.h:58
@ kWARNING
Definition: Types.h:59
@ kFATAL
Definition: Types.h:61
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:798
A TTree represents a columnar dataset.
Definition: TTree.h:79
Double_t y[n]
Definition: legend1.C:17
static constexpr double s
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:148
Bool_t IsNaN(Double_t x)
Definition: TMath.h:842
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:208
Double_t Log(Double_t x)
Definition: TMath.h:710
Double_t Sqrt(Double_t x)
Definition: TMath.h:641
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:176
Short_t Abs(Short_t d)
Definition: TMathBase.h:120
#define rprev(otri1, otri2)
Definition: triangle.c:1025