Logo ROOT   6.08/07
Reference Guide
RuleFitParams.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RuleFitParams *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15  * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
16  * *
17  * Copyright (c) 2005: *
18  * CERN, Switzerland *
19  * Iowa State U. *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #include "TMVA/RuleFitParams.h"
28 
29 #include "TMVA/DataSetInfo.h"
30 #include "TMVA/MethodRuleFit.h"
31 #include "TMVA/MsgLogger.h"
32 #include "TMVA/RuleEnsemble.h"
33 #include "TMVA/RuleFit.h"
34 #include "TMVA/Timer.h"
35 #include "TMVA/Tools.h"
36 #include "TMVA/Types.h"
37 
38 #include "TTree.h"
39 #include "TMath.h"
40 
41 #include <iostream>
42 #include <iomanip>
43 #include <numeric>
44 #include <algorithm>
45 #include <functional>
46 
49 
57 
58 ////////////////////////////////////////////////////////////////////////////////
59 /// constructor
60 
62  : fRuleFit ( 0 )
63  , fRuleEnsemble ( 0 )
64  , fNRules ( 0 )
65  , fNLinear ( 0 )
66  , fPathIdx1 ( 0 )
67  , fPathIdx2 ( 0 )
68  , fPerfIdx1 ( 0 )
69  , fPerfIdx2 ( 0 )
70  , fGDNTauTstOK( 0 )
71  , fGDNTau ( 51 )
72  , fGDTauPrec ( 0.02 )
73  , fGDTauScan ( 1000 )
74  , fGDTauMin ( 0.0 )
75  , fGDTauMax ( 1.0 )
76  , fGDTau ( -1.0 )
77  , fGDPathStep ( 0.01 )
78  , fGDNPathSteps ( 1000 )
79  , fGDErrScale ( 1.1 )
80  , fAverageTruth( 0 )
81  , fFstarMedian ( 0 )
82  , fGDNtuple ( 0 )
83  , fNTRisk ( 0 )
84  , fNTErrorRate ( 0 )
85  , fNTNuval ( 0 )
86  , fNTCoefRad ( 0 )
87  , fNTOffset ( 0 )
88  , fNTCoeff ( 0 )
89  , fNTLinCoeff ( 0 )
90  , fsigave( 0 )
91  , fsigrms( 0 )
92  , fbkgave( 0 )
93  , fbkgrms( 0 )
94  , fLogger( new MsgLogger("RuleFit") )
95 {
96  Init();
97 }
98 ////////////////////////////////////////////////////////////////////////////////
99 /// destructor
100 
102 {
103  if (fNTCoeff) { delete fNTCoeff; fNTCoeff = 0; }
104  if (fNTLinCoeff) { delete fNTLinCoeff;fNTLinCoeff = 0; }
105  delete fLogger;
106 }
107 
108 ////////////////////////////////////////////////////////////////////////////////
109 /// Initializes all parameters using the RuleEnsemble and the training tree
110 
112 {
113  if (fRuleFit==0) return;
114  if (fRuleFit->GetMethodRuleFit()==0) {
115  Log() << kFATAL << "RuleFitParams::Init() - MethodRuleFit ptr is null" << Endl;
116  }
117  UInt_t neve = fRuleFit->GetTrainingEvents().size();
118  //
122 
123  //
124  // Fraction of events used for validation should be close of unity..
125  // Always selection from the END
126  //
127  UInt_t ofs;
128  fPerfIdx1 = 0;
129  if (neve>1) {
130  fPerfIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
131  }
132  else {
133  fPerfIdx2 = 0;
134  }
135  ofs = neve - fPerfIdx2 - 1;
136  fPerfIdx1 += ofs;
137  fPerfIdx2 += ofs;
138  //
139  // Fraction of events used for the path search can be allowed to be a smaller value, say 0.5
140  // Alwas select events from the BEGINNING.
141  // This means that the validation and search samples will not overlap if both fractions are <0.5.
142  //
143  fPathIdx1 = 0;
144  if (neve>1) {
145  fPathIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
146  }
147  else {
148  fPathIdx2 = 0;
149  }
150  //
151  // summarize weights
152  //
153  fNEveEffPath = 0;;
154  for (UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
156  }
157 
158  fNEveEffPerf=0;
159  for (UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
161  }
162  //
163  Log() << kVERBOSE << "Path constr. - event index range = [ " << fPathIdx1 << ", " << fPathIdx2 << " ]"
164  << ", effective N(events) = " << fNEveEffPath << Endl;
165  Log() << kVERBOSE << "Error estim. - event index range = [ " << fPerfIdx1 << ", " << fPerfIdx2 << " ]"
166  << ", effective N(events) = " << fNEveEffPerf << Endl;
167  //
168  if (fRuleEnsemble->DoRules())
169  Log() << kDEBUG << "Number of rules in ensemble: " << fNRules << Endl;
170  else
171  Log() << kDEBUG << "Rules are disabled " << Endl;
172 
173  if (fRuleEnsemble->DoLinear())
174  Log() << kDEBUG << "Number of linear terms: " << fNLinear << Endl;
175  else
176  Log() << kDEBUG << "Linear terms are disabled " << Endl;
177 }
178 
179 ////////////////////////////////////////////////////////////////////////////////
180 /// initializes the ntuple
181 
183 {
184  fGDNtuple= new TTree("MonitorNtuple_RuleFitParams","RuleFit path search");
185  fGDNtuple->Branch("risk", &fNTRisk, "risk/D");
186  fGDNtuple->Branch("error", &fNTErrorRate,"error/D");
187  fGDNtuple->Branch("nuval", &fNTNuval, "nuval/D");
188  fGDNtuple->Branch("coefrad", &fNTCoefRad, "coefrad/D");
189  fGDNtuple->Branch("offset", &fNTOffset, "offset/D");
190  //
191  fNTCoeff = (fNRules >0 ? new Double_t[fNRules] : 0);
192  fNTLinCoeff = (fNLinear>0 ? new Double_t[fNLinear] : 0);
193 
194  for (UInt_t i=0; i<fNRules; i++) {
195  fGDNtuple->Branch(Form("a%d",i+1),&fNTCoeff[i],Form("a%d/D",i+1));
196  }
197  for (UInt_t i=0; i<fNLinear; i++) {
198  fGDNtuple->Branch(Form("b%d",i+1),&fNTLinCoeff[i],Form("b%d/D",i+1));
199  }
200 }
201 
202 ////////////////////////////////////////////////////////////////////////////////
203 /// evaluate the average of each variable and f(x) in the given range
204 
206  std::vector<Double_t> &avsel,
207  std::vector<Double_t> &avrul )
208 {
209  UInt_t neve = ind2-ind1+1;
210  if (neve<1) {
211  Log() << kFATAL << "<EvaluateAverage> - no events selected for path search -> BUG!" << Endl;
212  }
213  //
214  avsel.clear();
215  avrul.clear();
216  //
217  if (fNLinear>0) avsel.resize(fNLinear,0);
218  if (fNRules>0) avrul.resize(fNRules,0);
219  const std::vector<UInt_t> *eventRuleMap=0;
220  Double_t ew;
221  Double_t sumew=0;
222  //
223  // Loop over events and calculate average of linear terms (normalised) and rule response.
224  //
225  if (fRuleEnsemble->IsRuleMapOK()) { // MakeRuleMap() has been called
226  for ( UInt_t i=ind1; i<ind2+1; i++) {
228  sumew += ew;
229  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
230  avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
231  }
232  // loop over rules
233  UInt_t nrules=0;
234  if (fRuleEnsemble->DoRules()) {
235  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
236  nrules = (*eventRuleMap).size();
237  }
238  for (UInt_t r=0; r<nrules; r++) {
239  avrul[(*eventRuleMap)[r]] += ew;
240  }
241  }
242  }
243  else { // MakeRuleMap() has not yet been called
244  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
245  for ( UInt_t i=ind1; i<ind2+1; i++) {
247  sumew += ew;
248  // first cache rule/lin response
249  /* Double_t val = */ fRuleEnsemble->EvalLinEvent(*((*events)[i]));
250  /* val = */ fRuleEnsemble->EvalEvent(*((*events)[i]));
251  // loop over linear terms
252  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
253  avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
254  }
255  // loop over rules
256  for (UInt_t r=0; r<fNRules; r++) {
257  avrul[r] += ew*fRuleEnsemble->GetEventRuleVal(r);
258  }
259  }
260  }
261  // average variable
262  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
263  avsel[sel] = avsel[sel] / sumew;
264  }
265  // average rule response, excl coeff
266  for (UInt_t r=0; r<fNRules; r++) {
267  avrul[r] = avrul[r] / sumew;
268  }
269 }
270 
271 ////////////////////////////////////////////////////////////////////////////////
272 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
273 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
274 
276 {
277  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( e )) );
278  Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
279  //
280  return diff*diff*e.GetWeight();
281 }
282 
283 ////////////////////////////////////////////////////////////////////////////////
284 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
285 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
286 
288 {
289  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( evtidx )) );
291  //
292  return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
293 }
294 
295 ////////////////////////////////////////////////////////////////////////////////
296 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
297 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
298 
300 {
301  Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
302  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,e) );
304  //
305  return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
306 }
307 
308 ////////////////////////////////////////////////////////////////////////////////
309 /// risk asessment
310 
312 {
313  UInt_t neve = ind2-ind1+1;
314  if (neve<1) {
315  Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
316  }
317  Double_t rval=0;
318  //
319  // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
320  for ( UInt_t i=ind1; i<ind2+1; i++) {
321  rval += LossFunction(i);
322  }
323  rval = rval/neff;
324 
325  return rval;
326 }
327 
328 ////////////////////////////////////////////////////////////////////////////////
329 /// risk asessment for tau model <itau>
330 
332 {
333  UInt_t neve = ind2-ind1+1;
334  if (neve<1) {
335  Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
336  }
337  Double_t rval=0;
338  //
339  // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
340  for ( UInt_t i=ind1; i<ind2+1; i++) {
341  rval += LossFunction(i,itau);
342  }
343  rval = rval/neff;
344 
345  return rval;
346 }
347 
348 ////////////////////////////////////////////////////////////////////////////////
349 /// This is the "lasso" penalty
350 /// To be used for regression.
351 /// --- NOT USED ---
352 
354 {
355  Log() << kWARNING << "<Penalty> Using unverified code! Check!" << Endl;
356  Double_t rval=0;
357  const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
358  for (UInt_t i=0; i<fNRules; i++) {
359  rval += TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
360  }
361  for (UInt_t i=0; i<fNLinear; i++) {
362  rval += TMath::Abs((*lincoeff)[i]);
363  }
364  return rval;
365 }
366 
367 ////////////////////////////////////////////////////////////////////////////////
368 /// Initialize GD path search
369 
371 {
372  if (fGDNTau<2) {
373  fGDNTau = 1;
374  fGDTauScan = 0;
375  }
376  if (fGDTau<0.0) {
377  // fGDNTau = 50; already set in MethodRuleFit
378  fGDTauScan = 1000;
379  fGDTauMin = 0.0;
380  fGDTauMax = 1.0;
381  }
382  else {
383  fGDNTau = 1;
384  fGDTauScan = 0;
385  }
386  // set all taus
387  fGDTauVec.clear();
388  fGDTauVec.resize( fGDNTau );
389  if (fGDNTau==1) {
390  fGDTauVec[0] = fGDTau;
391  }
392  else {
393  // set tau vector - TODO: make a smarter choice of range in tau
394  Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
395  for (UInt_t itau=0; itau<fGDNTau; itau++) {
396  fGDTauVec[itau] = static_cast<Double_t>(itau)*dtau + fGDTauMin;
397  if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
398  }
399  }
400  // inititalize path search vectors
401 
402  fGradVec.clear();
403  fGradVecLin.clear();
404  fGradVecTst.clear();
405  fGradVecLinTst.clear();
406  fGDErrTst.clear();
407  fGDErrTstOK.clear();
408  fGDOfsTst.clear();
409  fGDCoefTst.clear();
410  fGDCoefLinTst.clear();
411  //
412  // rules
413  //
414  fGDCoefTst.resize(fGDNTau);
415  fGradVec.resize(fNRules,0);
416  fGradVecTst.resize(fGDNTau);
417  for (UInt_t i=0; i<fGDNTau; i++) {
418  fGradVecTst[i].resize(fNRules,0);
419  fGDCoefTst[i].resize(fNRules,0);
420  }
421  //
422  // linear terms
423  //
424  fGDCoefLinTst.resize(fGDNTau);
425  fGradVecLin.resize(fNLinear,0);
426  fGradVecLinTst.resize(fGDNTau);
427  for (UInt_t i=0; i<fGDNTau; i++) {
428  fGradVecLinTst[i].resize(fNLinear,0);
429  fGDCoefLinTst[i].resize(fNLinear,0);
430  }
431  //
432  // error, coefs etc
433  //
434  fGDErrTst.resize(fGDNTau,0);
435  fGDErrTstOK.resize(fGDNTau,kTRUE);
436  fGDOfsTst.resize(fGDNTau,0);
438  //
439  // calculate average selectors and rule responses for the path sample size
440  //
441 }
442 
443 ////////////////////////////////////////////////////////////////////////////////
444 /// This finds the cutoff parameter tau by scanning several different paths
445 
447 {
448  if (fGDNTau<2) return 0;
449  if (fGDTauScan==0) return 0;
450 
451  if (fGDOfsTst.size()<1)
452  Log() << kFATAL << "BUG! FindGDTau() has been called BEFORE InitGD()." << Endl;
453  //
454  Log() << kINFO << "Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." << Endl;
455  //
456  // Find how many points to scan and how often to calculate the error
457  UInt_t nscan = fGDTauScan; //std::min(static_cast<Int_t>(fGDTauScan),fGDNPathSteps);
458  UInt_t netst = std::min(nscan,UInt_t(100));
459  UInt_t nscanned=0;
460  //
461  //--------------------
462  // loop over the paths
463  //--------------------
464  // The number of MAXIMUM loops is given by nscan.
465  // At each loop, the paths being far away from the minimum
466  // are rejected. Hence at each check (every netst events), the number
467  // of paths searched will be reduced.
468  // The maximum 'distance' from the minimum error rate is
469  // 1 sigma. See RiskPerfTst() for details.
470  //
471  Bool_t doloop=kTRUE;
472  UInt_t ip=0;
473  UInt_t itauMin=0;
474  Timer timer( nscan, "RuleFit" );
475  while (doloop) {
476  // make gradvec
478  // update coefs
480  // estimate error and do the sum
481  // do this at index=0, netst-1, 2*netst-1 ...
482  nscanned++;
483  if ( (ip==0) || ((ip+1)%netst==0) ) {
484  // ErrorRateRocTst( );
485  itauMin = RiskPerfTst();
486  Log() << kVERBOSE << Form("%4d",ip+1) << ". tau = " << Form("%4.4f",fGDTauVec[itauMin])
487  << " => error rate = " << fGDErrTst[itauMin] << Endl;
488  }
489  ip++;
490  doloop = ((ip<nscan) && (fGDNTauTstOK>3));
492  if (Log().GetMinType()>kVERBOSE)
493  timer.DrawProgressBar(ip);
494  }
495  //
496  // Set tau and coefs
497  // Downscale tau slightly in order to avoid numerical problems
498  //
499  if (nscanned==0) {
500  Log() << kERROR << "<FindGDTau> number of scanned loops is zero! Should NOT see this message." << Endl;
501  }
502  fGDTau = fGDTauVec[itauMin];
505  fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
506  Log() << kINFO << "Best path found with tau = " << Form("%4.4f",fGDTau)
507  << " after " << timer.GetElapsedTime() << " " << Endl;
508 
509  return nscan;
510 }
511 
512 ////////////////////////////////////////////////////////////////////////////////
513 /// The following finds the gradient directed path in parameter space.
514 /// More work is needed... FT, 24/9/2006
515 /// The algorithm is currently as follows:
516 /// <***if not otherwise stated, the sample used below is [fPathIdx1,fPathIdx2]***>
517 /// 1. Set offset to -average(y(true)) and all coefs=0 => average of F(x)==0
518 /// 2. FindGDTau() : start scanning using several paths defined by different tau
519 /// choose the tau yielding the best path
520 /// 3. start the scanning the chosen path
521 /// 4. check error rate at a given frequency
522 /// data used for check: [fPerfIdx1,fPerfIdx2]
523 /// 5. stop when either of the following onditions are fullfilled:
524 /// a. loop index==fGDNPathSteps
525 /// b. error > fGDErrScale*errmin
526 /// c. only in DEBUG mode: risk is not monotoneously decreasing
527 ///
528 /// The algorithm will warn if:
529 /// I. the error rate was still decreasing when loop finnished -> increase fGDNPathSteps!
530 /// II. minimum was found at an early stage -> decrease fGDPathStep
531 /// III. DEBUG: risk > previous risk -> entered caotic region (regularization is too small)
532 ///
533 
535 {
536  Log() << kINFO << "GD path scan - the scan stops when the max num. of steps is reached or a min is found"
537  << Endl;
538  Log() << kVERBOSE << "Number of events used per path step = " << fPathIdx2-fPathIdx1+1 << Endl;
539  Log() << kVERBOSE << "Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 << Endl;
540 
541  // check if debug mode
542  const Bool_t isVerbose = (Log().GetMinType()<=kVERBOSE);
543  const Bool_t isDebug = (Log().GetMinType()<=kDEBUG);
544 
545  // init GD parameters and clear coeff vectors
546  InitGD();
547 
548  // evaluate average response of rules/linear terms (with event weights)
551 
552  // initial estimate; all other a(i) are zero
553  Log() << kVERBOSE << "Creating GD path" << Endl;
554  Log() << kVERBOSE << " N(steps) = " << fGDNPathSteps << Endl;
555  Log() << kVERBOSE << " step = " << fGDPathStep << Endl;
556  Log() << kVERBOSE << " N(tau) = " << fGDNTau << Endl;
557  Log() << kVERBOSE << " N(tau steps) = " << fGDTauScan << Endl;
558  Log() << kVERBOSE << " tau range = [ " << fGDTauVec[0] << " , " << fGDTauVec[fGDNTau-1] << " ]" << Endl;
559 
560  // init ntuple
561  if (isDebug) InitNtuple();
562 
563  // DEBUG: risk scan
564  Int_t nbadrisk=0; // number of points where risk(i+1)>risk(i)
565  Double_t trisk=0; // time per risk evaluation
566  Double_t strisk=0; // total time
567  Double_t rprev=1e32; // previous risk
568 
569  // parameters set at point with min error
570  Double_t errmin=1e32; // min error
571  // Double_t riskMin=0; // risk
572  Int_t indMin=-1; // index
573  std::vector<Double_t> coefsMin; // rule coefs
574  std::vector<Double_t> lincoefsMin; // linear coefs
575  Double_t offsetMin; // offset
576 
577 
578  // DEBUG: timing
579  clock_t t0=0;
580  Double_t tgradvec;
581  Double_t tupgrade;
582  Double_t tperf;
583  Double_t stgradvec=0;
584  Double_t stupgrade=0;
585  Double_t stperf=0;
586 
587  // linear regression to estimate slope of error rate evolution
588  const UInt_t npreg=5;
589  std::vector<Double_t> valx;
590  std::vector<Double_t> valy;
591  std::vector<Double_t> valxy;
592 
593  // loop related
594  Bool_t docheck; // true if an error rate check is to be done
595  Int_t iloop=0; // loop index
596  Bool_t found=kFALSE; // true if minimum is found
597  Bool_t riskFlat=kFALSE; // DEBUG: flag is true if risk evolution behaves badly
598  Bool_t done = kFALSE; // flag that the scan is done
599 
600  // calculate how often to check error rate
601  int imod = fGDNPathSteps/100;
602  if (imod<100) imod = std::min(100,fGDNPathSteps);
603  if (imod>100) imod=100;
604 
605  // reset coefficients
607  offsetMin = fAverageTruth;
608  fRuleEnsemble->SetOffset(offsetMin);
611  for (UInt_t i=0; i<fGDOfsTst.size(); i++) {
612  fGDOfsTst[i] = offsetMin;
613  }
614  Log() << kVERBOSE << "Obtained initial offset = " << offsetMin << Endl;
615 
616  // find the best tau - returns the number of steps performed in scan
617  Int_t nprescan = FindGDTau();
618 
619  //
620  //
621  // calculate F*
622  //
623  // CalcFStar(fPerfIdx1, fPerfIdx2);
624  //
625 
626  // set some ntuple values
627  fNTRisk = rprev;
628  fNTCoefRad = -1.0;
629  fNTErrorRate = 0;
630 
631  // a local flag indicating for what reason the search was stopped
632  Int_t stopCondition=0;
633 
634  Log() << kINFO << "Fitting model..." << Endl;
635  // start loop with timer
636  Timer timer( fGDNPathSteps, "RuleFit" );
637  Log() << kWARNING;
638  while (!done) {
639  // Make gradient vector (eq 44, ref 1)
640  if (isVerbose) t0 = clock();
642  if (isVerbose) {
643  tgradvec = Double_t(clock()-t0)/CLOCKS_PER_SEC;
644  stgradvec += tgradvec;
645  }
646 
647  // Calculate the direction in parameter space (eq 25, ref 1) and update coeffs (eq 22, ref 1)
648  if (isVerbose) t0 = clock();
650  if (isVerbose) {
651  tupgrade = Double_t(clock()-t0)/CLOCKS_PER_SEC;
652  stupgrade += tupgrade;
653  }
654 
655  // don't check error rate every loop
656  docheck = ((iloop==0) ||((iloop+1)%imod==0));
657 
658  if (docheck) {
659  // draw progressbar only if not debug
660  if (!isVerbose)
661  timer.DrawProgressBar(iloop);
662  fNTNuval = Double_t(iloop)*fGDPathStep;
663  fNTRisk = 0.0;
664 
665  // check risk evolution
666 
667  if (isDebug) FillCoefficients();
669 
670  // calculate risk
671  t0 = clock();
672  fNTRisk = RiskPath();
673  trisk = Double_t(clock()-t0)/CLOCKS_PER_SEC;
674  strisk += trisk;
675  //
676  // Check for an increase in risk.
677  // Such an increase would imply that the regularization is too small.
678  // Stop the iteration if this happens.
679  //
680  if (fNTRisk>=rprev) {
681  if (fNTRisk>rprev) {
682  nbadrisk++;
683  Log() << "Risk(i+1)>=Risk(i) in path" << Endl;
684  riskFlat=(nbadrisk>3);
685  if (riskFlat) {
686  Log() << kWARNING << "Chaotic behaviour of risk evolution" << Endl;
687  Log() << "--- STOPPING MINIMISATION ---" << Endl;
688  Log() << "This may be OK if minimum is already found" << Endl;
689  }
690  }
691  }
692  rprev = fNTRisk;
693  //
694  // Estimate the error rate using cross validation
695  // Well, not quite full cross validation since we only
696  // use ONE model.
697  //
698  if (isVerbose) t0 = clock();
699  fNTErrorRate = 0;
700 
701  // Check error rate
702  Double_t errroc;//= ErrorRateRoc();
703  Double_t riskPerf = RiskPerf();
704  // Double_t optimism = Optimism();
705  //
706  errroc = riskPerf;
707  //
708  fNTErrorRate = errroc;
709  //
710  if (isVerbose) {
711  tperf = Double_t(clock()-t0)/CLOCKS_PER_SEC;
712  stperf +=tperf;
713  }
714  //
715  // Always take the last min.
716  // For each step the risk is reduced.
717  //
718  if (fNTErrorRate<=errmin) {
719  errmin = fNTErrorRate;
720  // riskMin = fNTRisk;
721  indMin = iloop;
722  fRuleEnsemble->GetCoefficients(coefsMin);
723  lincoefsMin = fRuleEnsemble->GetLinCoefficients();
724  offsetMin = fRuleEnsemble->GetOffset();
725  }
726  if ( fNTErrorRate > fGDErrScale*errmin) found = kTRUE;
727  //
728  // check slope of last couple of points
729  //
730  if (valx.size()==npreg) {
731  valx.erase(valx.begin());
732  valy.erase(valy.begin());
733  valxy.erase(valxy.begin());
734  }
735  valx.push_back(fNTNuval);
736  valy.push_back(fNTErrorRate);
737  valxy.push_back(fNTErrorRate*fNTNuval);
738 
740 
741  //
742  if (isDebug) fGDNtuple->Fill();
743  if (isVerbose) {
744  Log() << kVERBOSE << "ParamsIRE : "
745  << std::setw(10)
746  << Form("%8d",iloop+1) << " "
747  << Form("%4.4f",fNTRisk) << " "
748  << Form("%4.4f",riskPerf) << " "
749  << Form("%4.4f",fNTRisk+riskPerf) << " "
750  // << Form("%4.4f",fsigave+fbkgave) << " "
751  // << Form("%4.4f",fsigave) << " "
752  // << Form("%4.4f",fsigrms) << " "
753  // << Form("%4.4f",fbkgave) << " "
754  // << Form("%4.4f",fbkgrms) << " "
755 
756  // << Form("%4.4f",fRuleEnsemble->CoefficientRadius())
757  << Endl;
758  }
759  }
760  iloop++;
761  // Stop iteration under various conditions
762  // * The condition R(i+1)<R(i) is no longer true (when then implicit regularization is too weak)
763  // * If the current error estimate is > factor*errmin (factor = 1.1)
764  // * We have reach the last step...
765  Bool_t endOfLoop = (iloop==fGDNPathSteps);
766  if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
767  if (riskFlat) {
768  stopCondition = 1;
769  }
770  else if (endOfLoop) {
771  stopCondition = 2;
772  }
773  if (indMin<0) {
774  Log() << kWARNING << "BUG TRAP: should not be here - still, this bug is harmless;)" << Endl;
775  errmin = fNTErrorRate;
776  // riskMin = fNTRisk;
777  indMin = iloop;
778  fRuleEnsemble->GetCoefficients(coefsMin);
779  lincoefsMin = fRuleEnsemble->GetLinCoefficients();
780  offsetMin = fRuleEnsemble->GetOffset();
781  }
782  found = kTRUE;
783  }
784  done = (found);
785  }
786  Log() << Endl;
787  Log() << kINFO << "Minimisation elapsed time : " << timer.GetElapsedTime() << " " << Endl;
788  Log() << kINFO << "----------------------------------------------------------------" << Endl;
789  Log() << kINFO << "Found minimum at step " << indMin+1 << " with error = " << errmin << Endl;
790  Log() << kINFO << "Reason for ending loop: ";
791  switch (stopCondition) {
792  case 0:
793  Log() << kINFO << "clear minima found";
794  break;
795  case 1:
796  Log() << kINFO << "chaotic behaviour of risk";
797  break;
798  case 2:
799  Log() << kINFO << "end of loop reached";
800  break;
801  default:
802  Log() << kINFO << "unknown!";
803  break;
804  }
805  Log() << Endl;
806  Log() << kINFO << "----------------------------------------------------------------" << Endl;
807 
808  // check if early minima - might be an indication of too large stepsize
809  if ( Double_t(indMin)/Double_t(nprescan+fGDNPathSteps) < 0.05 ) {
810  Log() << kWARNING << "Reached minimum early in the search" << Endl;
811  Log() << "Check results and maybe decrease GDStep size" << Endl;
812  }
813  //
814  // quick check of the sign of the slope for the last npreg points
815  //
816  Double_t sumx = std::accumulate( valx.begin(), valx.end(), Double_t() );
817  Double_t sumxy = std::accumulate( valxy.begin(), valxy.end(), Double_t() );
818  Double_t sumy = std::accumulate( valy.begin(), valy.end(), Double_t() );
819  Double_t slope = Double_t(valx.size())*sumxy - sumx*sumy;
820  if (slope<0) {
821  Log() << kINFO << "The error rate was still decreasing at the end of the path" << Endl;
822  Log() << kINFO << "Increase number of steps (GDNSteps)." << Endl;
823  }
824  //
825  // set coefficients
826  //
827  if (found) {
828  fRuleEnsemble->SetCoefficients( coefsMin );
829  fRuleEnsemble->SetLinCoefficients( lincoefsMin );
830  fRuleEnsemble->SetOffset( offsetMin );
831  }
832  else {
833  Log() << kFATAL << "BUG TRAP: minimum not found in MakeGDPath()" << Endl;
834  }
835 
836  //
837  // print timing info (VERBOSE mode)
838  //
839  if (isVerbose) {
840  Double_t stloop = strisk +stupgrade + stgradvec + stperf;
841  Log() << kVERBOSE << "Timing per loop (ms):" << Endl;
842  Log() << kVERBOSE << " gradvec = " << 1000*stgradvec/iloop << Endl;
843  Log() << kVERBOSE << " upgrade = " << 1000*stupgrade/iloop << Endl;
844  Log() << kVERBOSE << " risk = " << 1000*strisk/iloop << Endl;
845  Log() << kVERBOSE << " perf = " << 1000*stperf/iloop << Endl;
846  Log() << kVERBOSE << " loop = " << 1000*stloop/iloop << Endl;
847  //
848  Log() << kVERBOSE << " GDInit = " << 1000*gGDInit/iloop << Endl;
849  Log() << kVERBOSE << " GDPtr = " << 1000*gGDPtr/iloop << Endl;
850  Log() << kVERBOSE << " GDEval = " << 1000*gGDEval/iloop << Endl;
851  Log() << kVERBOSE << " GDEvalRule = " << 1000*gGDEvalRule/iloop << Endl;
852  Log() << kVERBOSE << " GDNorm = " << 1000*gGDNorm/iloop << Endl;
853  Log() << kVERBOSE << " GDRuleLoop = " << 1000*gGDRuleLoop/iloop << Endl;
854  Log() << kVERBOSE << " GDLinLoop = " << 1000*gGDLinLoop/iloop << Endl;
855  }
856  //
857  // write ntuple (DEBUG)
858  if (isDebug) fGDNtuple->Write();
859 }
860 
861 ////////////////////////////////////////////////////////////////////////////////
862 /// helper function to store the rule coefficients in local arrays
863 
865 {
867  //
868  for (UInt_t i=0; i<fNRules; i++) {
869  fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
870  }
871  for (UInt_t i=0; i<fNLinear; i++) {
873  }
874 }
875 
876 ////////////////////////////////////////////////////////////////////////////////
877 /// Estimates F* (optimum scoring function) for all events for the given sets.
878 /// The result is used in ErrorRateReg().
879 /// --- NOT USED ---
880 ///
881 
883 {
884  Log() << kWARNING << "<CalcFStar> Using unverified code! Check!" << Endl;
885  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
886  if (neve<1) {
887  Log() << kFATAL << "<CalcFStar> Invalid start/end indices!" << Endl;
888  return;
889  }
890  //
891  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
892  //
893  fFstar.clear();
894  std::vector<Double_t> fstarSorted;
895  Double_t fstarVal;
896  // loop over all events and estimate F* for each event
897  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
898  const Event& e = *(*events)[i];
899  fstarVal = fRuleEnsemble->FStar(e);
900  fFstar.push_back(fstarVal);
901  fstarSorted.push_back(fstarVal);
902  if (TMath::IsNaN(fstarVal)) Log() << kFATAL << "F* is NAN!" << Endl;
903  }
904  // sort F* and find median
905  std::sort( fstarSorted.begin(), fstarSorted.end() );
906  UInt_t ind = neve/2;
907  if (neve&1) { // odd number of events
908  fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
909  }
910  else { // even
911  fFstarMedian = fstarSorted[ind];
912  }
913 }
914 
915 ////////////////////////////////////////////////////////////////////////////////
916 /// implementation of eq. 7.17 in Hastie,Tibshirani & Friedman book
917 /// this is the covariance between the estimated response yhat and the
918 /// true value y.
919 /// NOT REALLY SURE IF THIS IS CORRECT!
920 /// --- THIS IS NOT USED ---
921 ///
922 
924 {
925  Log() << kWARNING << "<Optimism> Using unverified code! Check!" << Endl;
926  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
927  if (neve<1) {
928  Log() << kFATAL << "<Optimism> Invalid start/end indices!" << Endl;
929  }
930  //
931  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
932  //
933  Double_t sumy=0;
934  Double_t sumyhat=0;
935  Double_t sumyhaty=0;
936  Double_t sumw2=0;
937  Double_t sumw=0;
938  Double_t yhat;
939  Double_t y;
940  Double_t w;
941  //
942  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
943  const Event& e = *(*events)[i];
944  yhat = fRuleEnsemble->EvalEvent(i); // evaluated using the model
945  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0); // the truth
946  w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf; // the weight, reweighted such that sum=1
947  sumy += w*y;
948  sumyhat += w*yhat;
949  sumyhaty += w*yhat*y;
950  sumw2 += w*w;
951  sumw += w;
952  }
953  Double_t div = 1.0-sumw2;
954  Double_t cov = sumyhaty - sumyhat*sumy;
955  return 2.0*cov/div;
956 }
957 
958 ////////////////////////////////////////////////////////////////////////////////
959 /// Estimates the error rate with the current set of parameters
960 /// This code is pretty messy at the moment.
961 /// Cleanup is needed.
962 /// -- NOT USED ---
963 ///
964 
966 {
967  Log() << kWARNING << "<ErrorRateReg> Using unverified code! Check!" << Endl;
968  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
969  if (neve<1) {
970  Log() << kFATAL << "<ErrorRateReg> Invalid start/end indices!" << Endl;
971  }
972  if (fFstar.size()!=neve) {
973  Log() << kFATAL << "--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
974  << " Fstar.size() = " << fFstar.size() << " , N(events) = " << neve << Endl;
975  }
976  //
977  Double_t sF;
978  //
979  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
980  //
981  Double_t sumdf = 0;
982  Double_t sumdfmed = 0;
983  //
984  // A bit messy here.
985  // I believe the binary error classification is appropriate here.
986  // The problem is stability.
987  //
988  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
989  const Event& e = *(*events)[i];
990  sF = fRuleEnsemble->EvalEvent( e );
991  // scaled abs error, eq 20 in RuleFit paper
992  sumdf += TMath::Abs(fFstar[i-fPerfIdx1] - sF);
993  sumdfmed += TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
994  }
995  // scaled abs error, eq 20
996  // This error (df) is large - need to think on how to compensate...
997  //
998  return sumdf/sumdfmed;
999 }
1000 
1001 ////////////////////////////////////////////////////////////////////////////////
1002 ///
1003 /// Estimates the error rate with the current set of parameters
1004 /// It uses a binary estimate of (y-F*(x))
1005 /// (y-F*(x)) = (Num of events where sign(F)!=sign(y))/Neve
1006 /// y = {+1 if event is signal, -1 otherwise}
1007 /// --- NOT USED ---
1008 ///
1009 
1011 {
1012  Log() << kWARNING << "<ErrorRateBin> Using unverified code! Check!" << Endl;
1013  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1014  if (neve<1) {
1015  Log() << kFATAL << "<ErrorRateBin> Invalid start/end indices!" << Endl;
1016  }
1017  //
1018  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1019  //
1020  Double_t sumdfbin = 0;
1021  Double_t dneve = Double_t(neve);
1022  Int_t signF, signy;
1023  Double_t sF;
1024  //
1025  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1026  const Event& e = *(*events)[i];
1027  sF = fRuleEnsemble->EvalEvent( e );
1028  // Double_t sFstar = fRuleEnsemble->FStar(e); // THIS CAN BE CALCULATED ONCE!
1029  signF = (sF>0 ? +1:-1);
1030  // signy = (sFStar>0 ? +1:-1);
1031  signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
1032  sumdfbin += TMath::Abs(Double_t(signF-signy))*0.5;
1033  }
1034  Double_t f = sumdfbin/dneve;
1035  // Double_t df = f*TMath::Sqrt((1.0/sumdfbin) + (1.0/dneve));
1036  return f;
1037 }
1038 
1039 ////////////////////////////////////////////////////////////////////////////////
1040 
1041 Double_t TMVA::RuleFitParams::ErrorRateRocRaw( std::vector<Double_t> & sFsig,
1042  std::vector<Double_t> & sFbkg )
1043 
1044 {
1045  //
1046  // Estimates the error rate with the current set of parameters.
1047  // It calculates the area under the bkg rejection vs signal efficiency curve.
1048  // The value returned is 1-area.
1049  //
1050  std::sort(sFsig.begin(), sFsig.end());
1051  std::sort(sFbkg.begin(), sFbkg.end());
1052  const Double_t minsig = sFsig.front();
1053  const Double_t minbkg = sFbkg.front();
1054  const Double_t maxsig = sFsig.back();
1055  const Double_t maxbkg = sFbkg.back();
1056  const Double_t minf = std::min(minsig,minbkg);
1057  const Double_t maxf = std::max(maxsig,maxbkg);
1058  const Int_t nsig = Int_t(sFsig.size());
1059  const Int_t nbkg = Int_t(sFbkg.size());
1060  const Int_t np = std::min((nsig+nbkg)/4,50);
1061  const Double_t df = (maxf-minf)/(np-1);
1062  //
1063  // calculate area under rejection/efficiency curve
1064  //
1065  Double_t fcut;
1066  std::vector<Double_t>::const_iterator indit;
1067  Int_t nrbkg;
1068  Int_t nesig;
1069  Int_t pnesig=0;
1070  Double_t rejb=0;
1071  Double_t effs=1.0;
1072  Double_t prejb=0;
1073  Double_t peffs=1.0;
1074  // Double_t drejb;
1075  Double_t deffs;
1076  Double_t area=0;
1077  Int_t npok=0;
1078  //
1079  // loop over range of F [minf,maxf]
1080  //
1081  for (Int_t i=0; i<np; i++) {
1082  fcut = minf + df*Double_t(i);
1083  indit = std::find_if( sFsig.begin(), sFsig.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1084  nesig = sFsig.end()-indit; // number of sig accepted with F>cut
1085  if (TMath::Abs(pnesig-nesig)>0) {
1086  npok++;
1087  indit = std::find_if( sFbkg.begin(), sFbkg.end(), std::bind2nd(std::greater_equal<Double_t>(), fcut));
1088  nrbkg = indit-sFbkg.begin(); // number of bkg rejected with F>cut
1089  rejb = Double_t(nrbkg)/Double_t(nbkg);
1090  effs = Double_t(nesig)/Double_t(nsig);
1091  //
1092  // drejb = rejb-prejb;
1093  deffs = effs-peffs;
1094  area += 0.5*TMath::Abs(deffs)*(rejb+prejb); // trapezoid
1095  prejb = rejb;
1096  peffs = effs;
1097  }
1098  pnesig = nesig;
1099  }
1100  area += 0.5*(1+rejb)*effs; // extrapolate to the end point
1101 
1102  return (1.0-area);
1103 }
1104 
1105 ////////////////////////////////////////////////////////////////////////////////
1106 ///
1107 /// Estimates the error rate with the current set of parameters.
1108 /// It calculates the area under the bkg rejection vs signal efficiency curve.
1109 /// The value returned is 1-area.
1110 /// This works but is less efficient than calculating the Risk using RiskPerf().
1111 ///
1112 
1114 {
1115  Log() << kWARNING << "<ErrorRateRoc> Should not be used in the current version! Check!" << Endl;
1116  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1117  if (neve<1) {
1118  Log() << kFATAL << "<ErrorRateRoc> Invalid start/end indices!" << Endl;
1119  }
1120  //
1121  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1122  //
1123  Double_t sF;
1124  //
1125  std::vector<Double_t> sFsig;
1126  std::vector<Double_t> sFbkg;
1127  Double_t sumfsig=0;
1128  Double_t sumfbkg=0;
1129  Double_t sumf2sig=0;
1130  Double_t sumf2bkg=0;
1131  //
1132  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1133  const Event& e = *(*events)[i];
1134  sF = fRuleEnsemble->EvalEvent(i);// * fRuleFit->GetTrainingEventWeight(i);
1135  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
1136  sFsig.push_back(sF);
1137  sumfsig +=sF;
1138  sumf2sig +=sF*sF;
1139  }
1140  else {
1141  sFbkg.push_back(sF);
1142  sumfbkg +=sF;
1143  sumf2bkg +=sF*sF;
1144  }
1145  }
1146  fsigave = sumfsig/sFsig.size();
1147  fbkgave = sumfbkg/sFbkg.size();
1148  fsigrms = TMath::Sqrt(gTools().ComputeVariance(sumf2sig,sumfsig,sFsig.size()));
1149  fbkgrms = TMath::Sqrt(gTools().ComputeVariance(sumf2bkg,sumfbkg,sFbkg.size()));
1150  //
1151  return ErrorRateRocRaw( sFsig, sFbkg );
1152 }
1153 
1154 ////////////////////////////////////////////////////////////////////////////////
1155 ///
1156 /// Estimates the error rate with the current set of parameters.
1157 /// It calculates the area under the bkg rejection vs signal efficiency curve.
1158 /// The value returned is 1-area.
1159 ///
1160 /// See comment under ErrorRateRoc().
1161 ///
1162 
1164 {
1165  Log() << kWARNING << "<ErrorRateRocTst> Should not be used in the current version! Check!" << Endl;
1166  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1167  if (neve<1) {
1168  Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1169  return;
1170  }
1171  //
1172  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1173  //
1174  // std::vector<Double_t> sF;
1175  Double_t sF;
1176  std::vector< std::vector<Double_t> > sFsig;
1177  std::vector< std::vector<Double_t> > sFbkg;
1178  //
1179  sFsig.resize( fGDNTau );
1180  sFbkg.resize( fGDNTau );
1181  // sF.resize( fGDNTau );
1182 
1183  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1184  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1185  // if (itau==0) sF = fRuleEnsemble->EvalEvent( *(*events)[i], fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1186  // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1187  sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1188  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1189  sFsig[itau].push_back(sF);
1190  }
1191  else {
1192  sFbkg[itau].push_back(sF);
1193  }
1194  }
1195  }
1196  Double_t err;
1197 
1198  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1199  err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1200  fGDErrTst[itau] = err;
1201  }
1202 }
1203 
1204 ////////////////////////////////////////////////////////////////////////////////
1205 ///
1206 /// Estimates the error rate with the current set of parameters.
1207 /// using the <Perf> subsample.
1208 /// Return the tau index giving the lowest error
1209 ///
1210 
1212 {
1213  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1214  if (neve<1) {
1215  Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1216  return 0;
1217  }
1218  //
1219  Double_t sumx = 0;
1220  Double_t sumx2 = 0;
1221  Double_t maxx = -100.0;
1222  Double_t minx = 1e30;
1223  UInt_t itaumin = 0;
1224  UInt_t nok=0;
1225  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1226  if (fGDErrTstOK[itau]) {
1227  nok++;
1228  fGDErrTst[itau] = RiskPerf(itau);
1229  sumx += fGDErrTst[itau];
1230  sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1231  if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1232  if (fGDErrTst[itau]<minx) {
1233  minx=fGDErrTst[itau];
1234  itaumin = itau;
1235  }
1236  }
1237  }
1238  Double_t sigx = TMath::Sqrt(gTools().ComputeVariance( sumx2, sumx, nok ) );
1239  Double_t maxacc = minx+sigx;
1240  //
1241  if (nok>0) {
1242  nok = 0;
1243  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1244  if (fGDErrTstOK[itau]) {
1245  if (fGDErrTst[itau] > maxacc) {
1246  fGDErrTstOK[itau] = kFALSE;
1247  }
1248  else {
1249  nok++;
1250  }
1251  }
1252  }
1253  }
1254  fGDNTauTstOK = nok;
1255  Log() << kVERBOSE << "TAU: "
1256  << itaumin << " "
1257  << nok << " "
1258  << minx << " "
1259  << maxx << " "
1260  << sigx << Endl;
1261  //
1262  return itaumin;
1263 }
1264 
1265 ////////////////////////////////////////////////////////////////////////////////
1266 /// make test gradient vector for all tau
1267 /// same algorithm as MakeGradientVector()
1268 
1270 {
1271  UInt_t neve = fPathIdx1-fPathIdx2+1;
1272  if (neve<1) {
1273  Log() << kFATAL << "<MakeTstGradientVector> Invalid start/end indices!" << Endl;
1274  return;
1275  }
1276  //
1277  Double_t norm = 2.0/fNEveEffPath;
1278  //
1279  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1280 
1281  // Clear gradient vectors
1282  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1283  if (fGDErrTstOK[itau]) {
1284  for (UInt_t ir=0; ir<fNRules; ir++) {
1285  fGradVecTst[itau][ir]=0;
1286  }
1287  for (UInt_t il=0; il<fNLinear; il++) {
1288  fGradVecLinTst[itau][il]=0;
1289  }
1290  }
1291  }
1292  //
1293  // Double_t val; // temp store
1294  Double_t sF; // score function value
1295  Double_t r; // eq 35, ref 1
1296  Double_t y; // true score (+1 or -1)
1297  const std::vector<UInt_t> *eventRuleMap=0;
1298  UInt_t rind;
1299  //
1300  // Loop over all events
1301  //
1302  UInt_t nsfok=0;
1303  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1304  const Event *e = (*events)[i];
1305  UInt_t nrules=0;
1306  if (fRuleEnsemble->DoRules()) {
1307  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1308  nrules = (*eventRuleMap).size();
1309  }
1310  for (UInt_t itau=0; itau<fGDNTau; itau++) { // loop over tau
1311  // if (itau==0) sF = fRuleEnsemble->EvalEvent( *e, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1312  // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1313  if (fGDErrTstOK[itau]) {
1314  sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1315  if (TMath::Abs(sF)<1.0) {
1316  nsfok++;
1317  r = 0;
1318  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1319  r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1320  // rule gradient vector
1321  for (UInt_t ir=0; ir<nrules; ir++) {
1322  rind = (*eventRuleMap)[ir];
1323  fGradVecTst[itau][rind] += r;
1324  }
1325  // linear terms
1326  for (UInt_t il=0; il<fNLinear; il++) {
1327  fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i, kTRUE );
1328  }
1329  } // if (TMath::Abs(F)<xxx)
1330  }
1331  }
1332  }
1333 }
1334 
1335 ////////////////////////////////////////////////////////////////////////////////
1336 /// Establish maximum gradient for rules, linear terms and the offset
1337 /// for all taus TODO: do not need index range!
1338 ///
1339 
1341 {
1342  Double_t maxr, maxl, cthresh, val;
1343  // loop over all taus
1344  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1345  if (fGDErrTstOK[itau]) {
1346  // find max gradient
1347  maxr = ( (fNRules>0 ?
1348  TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(), AbsValue()))):0) );
1349  maxl = ( (fNLinear>0 ?
1350  TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(), AbsValue()))):0) );
1351 
1352  // Use the maximum as a threshold
1353  Double_t maxv = (maxr>maxl ? maxr:maxl);
1354  cthresh = maxv * fGDTauVec[itau];
1355 
1356  // Add to offset, if gradient is large enough:
1357  // Loop over the gradient vector and move to next set of coefficients
1358  // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1359  //
1360  // Step-size is divided by 10 when looking for the best path.
1361  //
1362  if (maxv>0) {
1363  const Double_t stepScale=1.0;
1364  for (UInt_t i=0; i<fNRules; i++) {
1365  val = fGradVecTst[itau][i];
1366 
1367  if (TMath::Abs(val)>=cthresh) {
1368  fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1369  }
1370  }
1371  // Loop over the gradient vector for the linear part and move to next set of coefficients
1372  for (UInt_t i=0; i<fNLinear; i++) {
1373  val = fGradVecLinTst[itau][i];
1374  if (TMath::Abs(val)>=cthresh) {
1375  fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1376  }
1377  }
1378  }
1379  }
1380  }
1381  // set the offset - should be outside the itau loop!
1383 }
1384 
1385 ////////////////////////////////////////////////////////////////////////////////
1386 /// make gradient vector
1387 ///
1388 
1390 {
1391  clock_t t0;
1392  // clock_t t10;
1393  t0 = clock();
1394  //
1395  UInt_t neve = fPathIdx2-fPathIdx1+1;
1396  if (neve<1) {
1397  Log() << kFATAL << "<MakeGradientVector> Invalid start/end indices!" << Endl;
1398  return;
1399  }
1400  //
1401  const Double_t norm = 2.0/fNEveEffPath;
1402  //
1403  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1404 
1405  // Clear gradient vectors
1406  for (UInt_t ir=0; ir<fNRules; ir++) {
1407  fGradVec[ir]=0;
1408  }
1409  for (UInt_t il=0; il<fNLinear; il++) {
1410  fGradVecLin[il]=0;
1411  }
1412  //
1413  // Double_t val; // temp store
1414  Double_t sF; // score function value
1415  Double_t r; // eq 35, ref 1
1416  Double_t y; // true score (+1 or -1)
1417  const std::vector<UInt_t> *eventRuleMap=0;
1418  UInt_t rind;
1419  //
1420  gGDInit += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1421 
1422  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1423  const Event *e = (*events)[i];
1424 
1425  // t0 = clock(); //DEB
1426  sF = fRuleEnsemble->EvalEvent( i ); // should not contain the weight
1427  // gGDEval += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1428  if (TMath::Abs(sF)<1.0) {
1429  UInt_t nrules=0;
1430  if (fRuleEnsemble->DoRules()) {
1431  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1432  nrules = (*eventRuleMap).size();
1433  }
1434  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1435  r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1436  // rule gradient vector
1437  for (UInt_t ir=0; ir<nrules; ir++) {
1438  rind = (*eventRuleMap)[ir];
1439  fGradVec[rind] += r;
1440  }
1441  // gGDRuleLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1442  // linear terms
1443  // t0 = clock(); //DEB
1444  for (UInt_t il=0; il<fNLinear; il++) {
1445  fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i, kTRUE );
1446  }
1447  // gGDLinLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1448  } // if (TMath::Abs(F)<xxx)
1449  }
1450 }
1451 
1452 
1453 ////////////////////////////////////////////////////////////////////////////////
1454 /// Establish maximum gradient for rules, linear terms and the offset
1455 ///
1456 
1458 {
1459  Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1460  TMath::Abs(*(std::max_element( fGradVec.begin(), fGradVec.end(), AbsValue()))):0) );
1461  Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1462  TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(), AbsValue()))):0) );
1463  // Use the maximum as a threshold
1464  Double_t maxv = (maxr>maxl ? maxr:maxl);
1465  Double_t cthresh = maxv * fGDTau;
1466 
1467  Double_t useRThresh;
1468  Double_t useLThresh;
1469  //
1470  // Choose threshholds.
1471  //
1472  useRThresh = cthresh;
1473  useLThresh = cthresh;
1474 
1475  Double_t gval, lval, coef, lcoef;
1476 
1477  // Add to offset, if gradient is large enough:
1478  // Loop over the gradient vector and move to next set of coefficients
1479  // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1480  if (maxv>0) {
1481  for (UInt_t i=0; i<fGradVec.size(); i++) {
1482  gval = fGradVec[i];
1483  if (TMath::Abs(gval)>=useRThresh) {
1484  coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1485  fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1486  }
1487  }
1488 
1489  // Loop over the gradient vector for the linear part and move to next set of coefficients
1490  for (UInt_t i=0; i<fGradVecLin.size(); i++) {
1491  lval = fGradVecLin[i];
1492  if (TMath::Abs(lval)>=useLThresh) {
1494  fRuleEnsemble->SetLinCoefficient(i,lcoef);
1495  }
1496  }
1497  // Set the offset
1498  Double_t offset = CalcAverageResponse();
1499  fRuleEnsemble->SetOffset( offset );
1500  }
1501 }
1502 
1503 ////////////////////////////////////////////////////////////////////////////////
1504 /// calc average response for all test paths - TODO: see comment under CalcAverageResponse()
1505 /// note that 0 offset is used
1506 
1508 {
1509  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1510  if (fGDErrTstOK[itau]) {
1511  fGDOfsTst[itau] = 0;
1512  for (UInt_t s=0; s<fNLinear; s++) {
1513  fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
1514  }
1515  for (UInt_t r=0; r<fNRules; r++) {
1516  fGDOfsTst[itau] -= fGDCoefTst[itau][r] * fAverageRulePath[r];
1517  }
1518  }
1519  }
1520  //
1521 }
1522 
1523 ////////////////////////////////////////////////////////////////////////////////
1524 /// calulate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
1525 ///
1526 /// note that 0 offset is used
1527 
1529 {
1530  Double_t ofs = 0;
1531  for (UInt_t s=0; s<fNLinear; s++) {
1533  }
1534  for (UInt_t r=0; r<fNRules; r++) {
1535  ofs -= fRuleEnsemble->GetRules(r)->GetCoefficient() * fAverageRulePath[r];
1536  }
1537  return ofs;
1538 }
1539 
1540 ////////////////////////////////////////////////////////////////////////////////
1541 /// calulate the average truth
1542 
1544 {
1545  if (fPathIdx2<=fPathIdx1) {
1546  Log() << kFATAL << "<CalcAverageTruth> Invalid start/end indices!" << Endl;
1547  return 0;
1548  }
1549  Double_t sum=0;
1550  Double_t ensig=0;
1551  Double_t enbkg=0;
1552  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1553  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1555  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1556  else enbkg += ew;
1557  sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1558  }
1559  Log() << kVERBOSE << "Effective number of signal / background = " << ensig << " / " << enbkg << Endl;
1560 
1561  return sum/fNEveEffPath;
1562 }
1563 
1564 //_______________________________________________________________________
1565 
1567  return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
1568 }
1569 
1570 
1571 ////////////////////////////////////////////////////////////////////////////////
1572 
1574  fLogger->SetMinType(t);
1575 }
Double_t gGDRuleLoop
Bool_t gFIRSTORG
static long int sum(long int i)
Definition: Factory.cxx:1786
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
Double_t * fNTLinCoeff
std::vector< std::vector< Double_t > > fGDCoefLinTst
std::vector< Double_t > fAverageSelectorPath
const std::vector< Double_t > & GetLinNorm() const
Definition: RuleEnsemble.h:280
const std::vector< const TMVA::Event *> & GetTrainingEvents() const
Definition: RuleFit.h:141
Double_t EvalLinEventRaw(UInt_t vind, const Event &e, Bool_t norm) const
Definition: RuleEnsemble.h:552
void MakeGradientVector()
make gradient vector
UInt_t GetNLinear() const
Definition: RuleEnsemble.h:283
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4375
TH1 * h
Definition: legend2.C:5
void FillCoefficients()
helper function to store the rule coefficients in local arrays
EMsgType GetMinType() const
Definition: MsgLogger.h:75
const std::vector< UInt_t > & GetEventRuleMap(UInt_t evtidx) const
Definition: RuleEnsemble.h:311
Double_t RiskPath() const
const std::vector< TMVA::Rule * > & GetRulesConst() const
Definition: RuleEnsemble.h:277
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:186
std::vector< std::vector< Double_t > > fGradVecTst
std::vector< Double_t > fAverageRulePath
Double_t GetGDValidEveFrac() const
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
std::vector< Double_t > fGDTauVec
std::vector< Double_t > fFstar
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Definition: Timer.cxx:129
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
const std::vector< Double_t > & GetLinCoefficients() const
Definition: RuleEnsemble.h:279
Double_t GetEventLinearValNorm(UInt_t i) const
Definition: RuleEnsemble.h:309
void SetLinCoefficients(const std::vector< Double_t > &v)
Definition: RuleEnsemble.h:124
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
Tools & gTools()
Definition: Tools.cxx:79
TStopwatch timer
Definition: pirndm.C:37
#define rprev(otri1, otri2)
Definition: triangle.c:1024
std::vector< std::vector< Double_t > > fGDCoefTst
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
Double_t GetEventRuleVal(UInt_t i) const
Definition: RuleEnsemble.h:307
UInt_t GetNRules() const
Definition: RuleEnsemble.h:276
MsgLogger & Log() const
message logger
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk asessment
Double_t FStar() const
We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x) F(x) = FL(x) + FR(x) ...
Double_t CalcAverageTruth()
calulate the average truth
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:76
RuleEnsemble * GetRuleEnsemblePtr()
Definition: RuleFit.h:149
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
void SetMsgType(EMsgType t)
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9042
Double_t GetGDPathEveFrac() const
std::vector< Char_t > fGDErrTstOK
TRandom2 r(17)
Bool_t DoLinear() const
Definition: RuleEnsemble.h:267
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment...
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
EMsgType
Definition: Types.h:61
Double_t CoefficientRadius()
Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
std::vector< Double_t > fGDOfsTst
void SetOffset(Double_t v=0.0)
Definition: RuleEnsemble.h:122
void ClearCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:133
const TMVA::Event * GetRuleMapEvent(UInt_t evtidx) const
Definition: RuleEnsemble.h:312
Double_t Optimism()
implementation of eq.
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
void ClearLinCoefficients(Double_t val=0)
Definition: RuleEnsemble.h:134
Bool_t IsRuleMapOK() const
Definition: RuleEnsemble.h:313
void SetLinCoefficient(UInt_t i, Double_t v)
Definition: RuleEnsemble.h:125
Double_t gGDEval
void SetCoefficients(const std::vector< Double_t > &v)
set all rule coefficients
double f(double x)
void InitNtuple()
initializes the ntuple
Double_t GetTrainingEventWeight(UInt_t i) const
Definition: RuleFit.h:137
double Double_t
Definition: RtypesCore.h:55
Double_t y[n]
Definition: legend1.C:17
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
Double_t gGDEvalRule
virtual ~RuleFitParams()
destructor
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
std::vector< std::vector< Double_t > > fGradVecLinTst
Double_t gGDNorm
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
RuleFitParams()
constructor
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1652
Double_t gGDInit
std::vector< Double_t > fGradVecLin
Double_t GetOffset() const
Definition: RuleEnsemble.h:275
std::vector< Double_t > fGradVec
Double_t EvalLinEvent() const
Definition: RuleEnsemble.h:574
Bool_t DoRules() const
Definition: RuleEnsemble.h:268
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Int_t IsNaN(Double_t x)
Definition: TMath.h:613
Double_t CalcAverageResponse()
calulate the average response - TODO : rewrite bad dependancy on EvaluateAverage() ! ...
Double_t gGDLinLoop
RuleEnsemble * fRuleEnsemble
std::vector< TMVA::Rule * > & GetRules()
Definition: RuleEnsemble.h:278
Bool_t IsSignal(const Event *ev) const
void GetCoefficients(std::vector< Double_t > &v)
Retrieve all rule coefficients.
void InitGD()
Initialize GD path search.
A TTree object has a header with a name and a title.
Definition: TTree.h:98
Double_t EvalEvent() const
Definition: RuleEnsemble.h:426
Double_t gGDPtr
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
const MethodRuleFit * GetMethodRuleFit() const
Definition: RuleFit.h:152
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
std::vector< Double_t > fGDErrTst
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
void MakeGDPath()
The following finds the gradient directed path in parameter space.
const Bool_t kTRUE
Definition: Rtypes.h:91
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
Double_t RiskPerf() const
Bool_t gFIRSTTST
Int_t Type(const Event *e) const
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...