Logo ROOT  
Reference Guide
RuleFitParams.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RuleFitParams *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
15  * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
16  * *
17  * Copyright (c) 2005: *
18  * CERN, Switzerland *
19  * Iowa State U. *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 /*! \class TMVA::RuleFitParams
28 \ingroup TMVA
29 A class doing the actual fitting of a linear model using rules as base functions.
30 */
31 #include "TMVA/RuleFitParams.h"
32 
33 #include "TMVA/DataSetInfo.h"
34 #include "TMVA/MethodRuleFit.h"
35 #include "TMVA/MsgLogger.h"
36 #include "TMVA/RuleEnsemble.h"
37 #include "TMVA/RuleFit.h"
38 #include "TMVA/Timer.h"
39 #include "TMVA/Tools.h"
40 #include "TMVA/Types.h"
41 
42 #include "TTree.h"
43 #include "TMath.h"
44 
45 #include <iomanip>
46 #include <numeric>
47 #include <algorithm>
48 #include <functional>
49 
52 
60 
61 ////////////////////////////////////////////////////////////////////////////////
62 /// constructor
63 
65  : fRuleFit ( 0 )
66  , fRuleEnsemble ( 0 )
67  , fNRules ( 0 )
68  , fNLinear ( 0 )
69  , fPathIdx1 ( 0 )
70  , fPathIdx2 ( 0 )
71  , fPerfIdx1 ( 0 )
72  , fPerfIdx2 ( 0 )
73  , fGDNTauTstOK( 0 )
74  , fGDNTau ( 51 )
75  , fGDTauPrec ( 0.02 )
76  , fGDTauScan ( 1000 )
77  , fGDTauMin ( 0.0 )
78  , fGDTauMax ( 1.0 )
79  , fGDTau ( -1.0 )
80  , fGDPathStep ( 0.01 )
81  , fGDNPathSteps ( 1000 )
82  , fGDErrScale ( 1.1 )
83  , fAverageTruth( 0 )
84  , fFstarMedian ( 0 )
85  , fGDNtuple ( 0 )
86  , fNTRisk ( 0 )
87  , fNTErrorRate ( 0 )
88  , fNTNuval ( 0 )
89  , fNTCoefRad ( 0 )
90  , fNTOffset ( 0 )
91  , fNTCoeff ( 0 )
92  , fNTLinCoeff ( 0 )
93  , fsigave( 0 )
94  , fsigrms( 0 )
95  , fbkgave( 0 )
96  , fbkgrms( 0 )
97  , fLogger( new MsgLogger("RuleFit") )
98 {
99  Init();
100 }
101 ////////////////////////////////////////////////////////////////////////////////
102 /// destructor
103 
105 {
106  if (fNTCoeff) { delete [] fNTCoeff; fNTCoeff = nullptr; }
107  if (fNTLinCoeff) { delete [] fNTLinCoeff; fNTLinCoeff = nullptr; }
108  delete fLogger;
109 }
110 
111 ////////////////////////////////////////////////////////////////////////////////
112 /// Initializes all parameters using the RuleEnsemble and the training tree
113 
115 {
116  if (fRuleFit==0) return;
117  if (fRuleFit->GetMethodRuleFit()==0) {
118  Log() << kFATAL << "RuleFitParams::Init() - MethodRuleFit ptr is null" << Endl;
119  }
120  UInt_t neve = fRuleFit->GetTrainingEvents().size();
121  //
122  fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
123  fNRules = fRuleEnsemble->GetNRules();
124  fNLinear = fRuleEnsemble->GetNLinear();
125 
126  //
127  // Fraction of events used for validation should be close of unity..
128  // Always selection from the END
129  //
130  UInt_t ofs;
131  fPerfIdx1 = 0;
132  if (neve>1) {
133  fPerfIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
134  }
135  else {
136  fPerfIdx2 = 0;
137  }
138  ofs = neve - fPerfIdx2 - 1;
139  fPerfIdx1 += ofs;
140  fPerfIdx2 += ofs;
141  //
142  // Fraction of events used for the path search can be allowed to be a smaller value, say 0.5
143  // Alwas select events from the BEGINNING.
144  // This means that the validation and search samples will not overlap if both fractions are <0.5.
145  //
146  fPathIdx1 = 0;
147  if (neve>1) {
148  fPathIdx2 = static_cast<UInt_t>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
149  }
150  else {
151  fPathIdx2 = 0;
152  }
153  //
154  // summarize weights
155  //
156  fNEveEffPath = 0;;
157  for (UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
158  fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
159  }
160 
161  fNEveEffPerf=0;
162  for (UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
163  fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
164  }
165  //
166  Log() << kVERBOSE << "Path constr. - event index range = [ " << fPathIdx1 << ", " << fPathIdx2 << " ]"
167  << ", effective N(events) = " << fNEveEffPath << Endl;
168  Log() << kVERBOSE << "Error estim. - event index range = [ " << fPerfIdx1 << ", " << fPerfIdx2 << " ]"
169  << ", effective N(events) = " << fNEveEffPerf << Endl;
170  //
171  if (fRuleEnsemble->DoRules())
172  Log() << kDEBUG << "Number of rules in ensemble: " << fNRules << Endl;
173  else
174  Log() << kDEBUG << "Rules are disabled " << Endl;
175 
176  if (fRuleEnsemble->DoLinear())
177  Log() << kDEBUG << "Number of linear terms: " << fNLinear << Endl;
178  else
179  Log() << kDEBUG << "Linear terms are disabled " << Endl;
180 }
181 
182 ////////////////////////////////////////////////////////////////////////////////
183 /// initializes the ntuple
184 
186 {
187  fGDNtuple= new TTree("MonitorNtuple_RuleFitParams","RuleFit path search");
188  fGDNtuple->Branch("risk", &fNTRisk, "risk/D");
189  fGDNtuple->Branch("error", &fNTErrorRate,"error/D");
190  fGDNtuple->Branch("nuval", &fNTNuval, "nuval/D");
191  fGDNtuple->Branch("coefrad", &fNTCoefRad, "coefrad/D");
192  fGDNtuple->Branch("offset", &fNTOffset, "offset/D");
193  //
194  fNTCoeff = (fNRules >0 ? new Double_t[fNRules] : 0);
195  fNTLinCoeff = (fNLinear>0 ? new Double_t[fNLinear] : 0);
196 
197  for (UInt_t i=0; i<fNRules; i++) {
198  fGDNtuple->Branch(Form("a%d",i+1),&fNTCoeff[i],Form("a%d/D",i+1));
199  }
200  for (UInt_t i=0; i<fNLinear; i++) {
201  fGDNtuple->Branch(Form("b%d",i+1),&fNTLinCoeff[i],Form("b%d/D",i+1));
202  }
203 }
204 
205 ////////////////////////////////////////////////////////////////////////////////
206 /// evaluate the average of each variable and f(x) in the given range
207 
209  std::vector<Double_t> &avsel,
210  std::vector<Double_t> &avrul )
211 {
212  UInt_t neve = ind2-ind1+1;
213  if (neve<1) {
214  Log() << kFATAL << "<EvaluateAverage> - no events selected for path search -> BUG!" << Endl;
215  }
216  //
217  avsel.clear();
218  avrul.clear();
219  //
220  if (fNLinear>0) avsel.resize(fNLinear,0);
221  if (fNRules>0) avrul.resize(fNRules,0);
222  const std::vector<UInt_t> *eventRuleMap=0;
223  Double_t ew;
224  Double_t sumew=0;
225  //
226  // Loop over events and calculate average of linear terms (normalised) and rule response.
227  //
228  if (fRuleEnsemble->IsRuleMapOK()) { // MakeRuleMap() has been called
229  for ( UInt_t i=ind1; i<ind2+1; i++) {
230  ew = fRuleFit->GetTrainingEventWeight(i);
231  sumew += ew;
232  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
233  avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
234  }
235  // loop over rules
236  UInt_t nrules=0;
237  if (fRuleEnsemble->DoRules()) {
238  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
239  nrules = (*eventRuleMap).size();
240  }
241  for (UInt_t r=0; r<nrules; r++) {
242  avrul[(*eventRuleMap)[r]] += ew;
243  }
244  }
245  }
246  else { // MakeRuleMap() has not yet been called
247  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
248  for ( UInt_t i=ind1; i<ind2+1; i++) {
249  ew = fRuleFit->GetTrainingEventWeight(i);
250  sumew += ew;
251  // first cache rule/lin response
252  /* Double_t val = */ fRuleEnsemble->EvalLinEvent(*((*events)[i]));
253  /* val = */ fRuleEnsemble->EvalEvent(*((*events)[i]));
254  // loop over linear terms
255  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
256  avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
257  }
258  // loop over rules
259  for (UInt_t r=0; r<fNRules; r++) {
260  avrul[r] += ew*fRuleEnsemble->GetEventRuleVal(r);
261  }
262  }
263  }
264  // average variable
265  for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
266  avsel[sel] = avsel[sel] / sumew;
267  }
268  // average rule response, excl coeff
269  for (UInt_t r=0; r<fNRules; r++) {
270  avrul[r] = avrul[r] / sumew;
271  }
272 }
273 
274 ////////////////////////////////////////////////////////////////////////////////
275 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
276 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
277 
279 {
280  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( e )) );
281  Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
282  //
283  return diff*diff*e.GetWeight();
284 }
285 
286 ////////////////////////////////////////////////////////////////////////////////
287 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
288 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
289 
291 {
292  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( evtidx )) );
293  Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
294  //
295  return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
296 }
297 
298 ////////////////////////////////////////////////////////////////////////////////
299 /// Implementation of squared-error ramp loss function (eq 39,40 in ref 1)
300 /// This is used for binary Classifications where y = {+1,-1} for (sig,bkg)
301 
303 {
304  Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
305  Double_t h = TMath::Max( -1.0, TMath::Min(1.0,e) );
306  Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
307  //
308  return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
309 }
310 
311 ////////////////////////////////////////////////////////////////////////////////
312 /// risk assessment
313 
315 {
316  UInt_t neve = ind2-ind1+1;
317  if (neve<1) {
318  Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
319  }
320  Double_t rval=0;
321  //
322  // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
323  for ( UInt_t i=ind1; i<ind2+1; i++) {
324  rval += LossFunction(i);
325  }
326  rval = rval/neff;
327 
328  return rval;
329 }
330 
331 ////////////////////////////////////////////////////////////////////////////////
332 /// risk assessment for tau model <itau>
333 
335 {
336  UInt_t neve = ind2-ind1+1;
337  if (neve<1) {
338  Log() << kFATAL << "<Risk> Invalid start/end indices! BUG!!!" << Endl;
339  }
340  Double_t rval=0;
341  //
342  // const std::vector<Event *> *events = &(fRuleFit->GetTrainingEvents());
343  for ( UInt_t i=ind1; i<ind2+1; i++) {
344  rval += LossFunction(i,itau);
345  }
346  rval = rval/neff;
347 
348  return rval;
349 }
350 
351 ////////////////////////////////////////////////////////////////////////////////
352 /// This is the "lasso" penalty
353 /// To be used for regression.
354 /// --- NOT USED ---
355 
357 {
358  Log() << kWARNING << "<Penalty> Using unverified code! Check!" << Endl;
359  Double_t rval=0;
360  const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
361  for (UInt_t i=0; i<fNRules; i++) {
362  rval += TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
363  }
364  for (UInt_t i=0; i<fNLinear; i++) {
365  rval += TMath::Abs((*lincoeff)[i]);
366  }
367  return rval;
368 }
369 
370 ////////////////////////////////////////////////////////////////////////////////
371 /// Initialize GD path search
372 
374 {
375  if (fGDNTau<2) {
376  fGDNTau = 1;
377  fGDTauScan = 0;
378  }
379  if (fGDTau<0.0) {
380  // fGDNTau = 50; already set in MethodRuleFit
381  fGDTauScan = 1000;
382  fGDTauMin = 0.0;
383  fGDTauMax = 1.0;
384  }
385  else {
386  fGDNTau = 1;
387  fGDTauScan = 0;
388  }
389  // set all taus
390  fGDTauVec.clear();
391  fGDTauVec.resize( fGDNTau );
392  if (fGDNTau==1) {
393  fGDTauVec[0] = fGDTau;
394  }
395  else {
396  // set tau vector - TODO: make a smarter choice of range in tau
397  Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
398  for (UInt_t itau=0; itau<fGDNTau; itau++) {
399  fGDTauVec[itau] = static_cast<Double_t>(itau)*dtau + fGDTauMin;
400  if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
401  }
402  }
403  // initialize path search vectors
404 
405  fGradVec.clear();
406  fGradVecLin.clear();
407  fGradVecTst.clear();
408  fGradVecLinTst.clear();
409  fGDErrTst.clear();
410  fGDErrTstOK.clear();
411  fGDOfsTst.clear();
412  fGDCoefTst.clear();
413  fGDCoefLinTst.clear();
414  //
415  // rules
416  //
417  fGDCoefTst.resize(fGDNTau);
418  fGradVec.resize(fNRules,0);
419  fGradVecTst.resize(fGDNTau);
420  for (UInt_t i=0; i<fGDNTau; i++) {
421  fGradVecTst[i].resize(fNRules,0);
422  fGDCoefTst[i].resize(fNRules,0);
423  }
424  //
425  // linear terms
426  //
427  fGDCoefLinTst.resize(fGDNTau);
428  fGradVecLin.resize(fNLinear,0);
429  fGradVecLinTst.resize(fGDNTau);
430  for (UInt_t i=0; i<fGDNTau; i++) {
431  fGradVecLinTst[i].resize(fNLinear,0);
432  fGDCoefLinTst[i].resize(fNLinear,0);
433  }
434  //
435  // error, coefs etc
436  //
437  fGDErrTst.resize(fGDNTau,0);
438  fGDErrTstOK.resize(fGDNTau,kTRUE);
439  fGDOfsTst.resize(fGDNTau,0);
440  fGDNTauTstOK = fGDNTau;
441  //
442  // calculate average selectors and rule responses for the path sample size
443  //
444 }
445 
446 ////////////////////////////////////////////////////////////////////////////////
447 /// This finds the cutoff parameter tau by scanning several different paths
448 
450 {
451  if (fGDNTau<2) return 0;
452  if (fGDTauScan==0) return 0;
453 
454  if (fGDOfsTst.size()<1)
455  Log() << kFATAL << "BUG! FindGDTau() has been called BEFORE InitGD()." << Endl;
456  //
457  Log() << kINFO << "Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." << Endl;
458  //
459  // Find how many points to scan and how often to calculate the error
460  UInt_t nscan = fGDTauScan; //std::min(static_cast<Int_t>(fGDTauScan),fGDNPathSteps);
461  UInt_t netst = std::min(nscan,UInt_t(100));
462  UInt_t nscanned=0;
463  //
464  //--------------------
465  // loop over the paths
466  //--------------------
467  // The number of MAXIMUM loops is given by nscan.
468  // At each loop, the paths being far away from the minimum
469  // are rejected. Hence at each check (every netst events), the number
470  // of paths searched will be reduced.
471  // The maximum 'distance' from the minimum error rate is
472  // 1 sigma. See RiskPerfTst() for details.
473  //
474  Bool_t doloop=kTRUE;
475  UInt_t ip=0;
476  UInt_t itauMin=0;
477  Timer timer( nscan, "RuleFit" );
478  while (doloop) {
479  // make gradvec
480  MakeTstGradientVector();
481  // update coefs
482  UpdateTstCoefficients();
483  // estimate error and do the sum
484  // do this at index=0, netst-1, 2*netst-1 ...
485  nscanned++;
486  if ( (ip==0) || ((ip+1)%netst==0) ) {
487  // ErrorRateRocTst( );
488  itauMin = RiskPerfTst();
489  Log() << kVERBOSE << Form("%4d",ip+1) << ". tau = " << Form("%4.4f",fGDTauVec[itauMin])
490  << " => error rate = " << fGDErrTst[itauMin] << Endl;
491  }
492  ip++;
493  doloop = ((ip<nscan) && (fGDNTauTstOK>3));
495  if (Log().GetMinType()>kVERBOSE)
496  timer.DrawProgressBar(ip);
497  }
498  //
499  // Set tau and coefs
500  // Downscale tau slightly in order to avoid numerical problems
501  //
502  if (nscanned==0) {
503  Log() << kERROR << "<FindGDTau> number of scanned loops is zero! Should NOT see this message." << Endl;
504  }
505  fGDTau = fGDTauVec[itauMin];
506  fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
507  fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
508  fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
509  Log() << kINFO << "Best path found with tau = " << Form("%4.4f",fGDTau)
510  << " after " << timer.GetElapsedTime() << " " << Endl;
511 
512  return nscan;
513 }
514 
515 ////////////////////////////////////////////////////////////////////////////////
516 /// The following finds the gradient directed path in parameter space.
517 /// More work is needed... FT, 24/9/2006
518 ///
519 /// The algorithm is currently as follows (if not otherwise stated, the sample
520 /// used below is [fPathIdx1,fPathIdx2]):
521 ///
522 /// 1. Set offset to -average(y(true)) and all coefs=0 => average of F(x)==0
523 /// 2. FindGDTau() : start scanning using several paths defined by different tau
524 /// choose the tau yielding the best path
525 /// 3. start the scanning the chosen path
526 /// 4. check error rate at a given frequency
527 /// data used for check: [fPerfIdx1,fPerfIdx2]
528 /// 5. stop when either of the following conditions are fullfilled:
529 /// 1. loop index==fGDNPathSteps
530 /// 2. error > fGDErrScale*errmin
531 /// 3. only in DEBUG mode: risk is not monotonously decreasing
532 ///
533 /// The algorithm will warn if:
534 /// 1. the error rate was still decreasing when loop finished -> increase fGDNPathSteps!
535 /// 2. minimum was found at an early stage -> decrease fGDPathStep
536 /// 3. DEBUG: risk > previous risk -> entered chaotic region (regularization is too small)
537 
539 {
540  Log() << kINFO << "GD path scan - the scan stops when the max num. of steps is reached or a min is found"
541  << Endl;
542  Log() << kVERBOSE << "Number of events used per path step = " << fPathIdx2-fPathIdx1+1 << Endl;
543  Log() << kVERBOSE << "Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 << Endl;
544 
545  // check if debug mode
546  const Bool_t isVerbose = (Log().GetMinType()<=kVERBOSE);
547  const Bool_t isDebug = (Log().GetMinType()<=kDEBUG);
548 
549  // init GD parameters and clear coeff vectors
550  InitGD();
551 
552  // evaluate average response of rules/linear terms (with event weights)
553  EvaluateAveragePath();
554  EvaluateAveragePerf();
555 
556  // initial estimate; all other a(i) are zero
557  Log() << kVERBOSE << "Creating GD path" << Endl;
558  Log() << kVERBOSE << " N(steps) = " << fGDNPathSteps << Endl;
559  Log() << kVERBOSE << " step = " << fGDPathStep << Endl;
560  Log() << kVERBOSE << " N(tau) = " << fGDNTau << Endl;
561  Log() << kVERBOSE << " N(tau steps) = " << fGDTauScan << Endl;
562  Log() << kVERBOSE << " tau range = [ " << fGDTauVec[0] << " , " << fGDTauVec[fGDNTau-1] << " ]" << Endl;
563 
564  // init ntuple
565  if (isDebug) InitNtuple();
566 
567  // DEBUG: risk scan
568  Int_t nbadrisk=0; // number of points where risk(i+1)>risk(i)
569  Double_t trisk=0; // time per risk evaluation
570  Double_t strisk=0; // total time
571  Double_t rprev=1e32; // previous risk
572 
573  // parameters set at point with min error
574  Double_t errmin=1e32; // min error
575  // Double_t riskMin=0; // risk
576  Int_t indMin=-1; // index
577  std::vector<Double_t> coefsMin; // rule coefs
578  std::vector<Double_t> lincoefsMin; // linear coefs
579  Double_t offsetMin; // offset
580 
581 
582  // DEBUG: timing
583  clock_t t0=0;
584  Double_t tgradvec;
585  Double_t tupgrade;
586  Double_t tperf;
587  Double_t stgradvec=0;
588  Double_t stupgrade=0;
589  Double_t stperf=0;
590 
591  // linear regression to estimate slope of error rate evolution
592  const UInt_t npreg=5;
593  std::vector<Double_t> valx;
594  std::vector<Double_t> valy;
595  std::vector<Double_t> valxy;
596 
597  // loop related
598  Bool_t docheck; // true if an error rate check is to be done
599  Int_t iloop=0; // loop index
600  Bool_t found=kFALSE; // true if minimum is found
601  Bool_t riskFlat=kFALSE; // DEBUG: flag is true if risk evolution behaves badly
602  Bool_t done = kFALSE; // flag that the scan is done
603 
604  // calculate how often to check error rate
605  int imod = fGDNPathSteps/100;
606  if (imod<100) imod = std::min(100,fGDNPathSteps);
607  if (imod>100) imod=100;
608 
609  // reset coefficients
610  fAverageTruth = -CalcAverageTruth();
611  offsetMin = fAverageTruth;
612  fRuleEnsemble->SetOffset(offsetMin);
613  fRuleEnsemble->ClearCoefficients(0);
614  fRuleEnsemble->ClearLinCoefficients(0);
615  for (UInt_t i=0; i<fGDOfsTst.size(); i++) {
616  fGDOfsTst[i] = offsetMin;
617  }
618  Log() << kVERBOSE << "Obtained initial offset = " << offsetMin << Endl;
619 
620  // find the best tau - returns the number of steps performed in scan
621  Int_t nprescan = FindGDTau();
622 
623  //
624  //
625  // calculate F*
626  //
627  // CalcFStar(fPerfIdx1, fPerfIdx2);
628  //
629 
630  // set some ntuple values
631  fNTRisk = rprev;
632  fNTCoefRad = -1.0;
633  fNTErrorRate = 0;
634 
635  // a local flag indicating for what reason the search was stopped
636  Int_t stopCondition=0;
637 
638  Log() << kINFO << "Fitting model..." << Endl;
639  // start loop with timer
640  Timer timer( fGDNPathSteps, "RuleFit" );
641  Log() << kWARNING;
642  while (!done) {
643  // Make gradient vector (eq 44, ref 1)
644  if (isVerbose) t0 = clock();
645  MakeGradientVector();
646  if (isVerbose) {
647  tgradvec = Double_t(clock()-t0)/CLOCKS_PER_SEC;
648  stgradvec += tgradvec;
649  }
650 
651  // Calculate the direction in parameter space (eq 25, ref 1) and update coeffs (eq 22, ref 1)
652  if (isVerbose) t0 = clock();
653  UpdateCoefficients();
654  if (isVerbose) {
655  tupgrade = Double_t(clock()-t0)/CLOCKS_PER_SEC;
656  stupgrade += tupgrade;
657  }
658 
659  // don't check error rate every loop
660  docheck = ((iloop==0) ||((iloop+1)%imod==0));
661 
662  if (docheck) {
663  // draw progressbar only if not debug
664  if (!isVerbose)
665  timer.DrawProgressBar(iloop);
666  fNTNuval = Double_t(iloop)*fGDPathStep;
667  fNTRisk = 0.0;
668 
669  // check risk evolution
670 
671  if (isDebug) FillCoefficients();
672  fNTCoefRad = fRuleEnsemble->CoefficientRadius();
673 
674  // calculate risk
675  t0 = clock();
676  fNTRisk = RiskPath();
677  trisk = Double_t(clock()-t0)/CLOCKS_PER_SEC;
678  strisk += trisk;
679  //
680  // Check for an increase in risk.
681  // Such an increase would imply that the regularization is too small.
682  // Stop the iteration if this happens.
683  //
684  if (fNTRisk>=rprev) {
685  if (fNTRisk>rprev) {
686  nbadrisk++;
687  Log() << "Risk(i+1)>=Risk(i) in path" << Endl;
688  riskFlat=(nbadrisk>3);
689  if (riskFlat) {
690  Log() << kWARNING << "Chaotic behaviour of risk evolution" << Endl;
691  Log() << "--- STOPPING MINIMISATION ---" << Endl;
692  Log() << "This may be OK if minimum is already found" << Endl;
693  }
694  }
695  }
696  rprev = fNTRisk;
697  //
698  // Estimate the error rate using cross validation
699  // Well, not quite full cross validation since we only
700  // use ONE model.
701  //
702  if (isVerbose) t0 = clock();
703  fNTErrorRate = 0;
704 
705  // Check error rate
706  Double_t errroc;//= ErrorRateRoc();
707  Double_t riskPerf = RiskPerf();
708  // Double_t optimism = Optimism();
709  //
710  errroc = riskPerf;
711  //
712  fNTErrorRate = errroc;
713  //
714  if (isVerbose) {
715  tperf = Double_t(clock()-t0)/CLOCKS_PER_SEC;
716  stperf +=tperf;
717  }
718  //
719  // Always take the last min.
720  // For each step the risk is reduced.
721  //
722  if (fNTErrorRate<=errmin) {
723  errmin = fNTErrorRate;
724  // riskMin = fNTRisk;
725  indMin = iloop;
726  fRuleEnsemble->GetCoefficients(coefsMin);
727  lincoefsMin = fRuleEnsemble->GetLinCoefficients();
728  offsetMin = fRuleEnsemble->GetOffset();
729  }
730  if ( fNTErrorRate > fGDErrScale*errmin) found = kTRUE;
731  //
732  // check slope of last couple of points
733  //
734  if (valx.size()==npreg) {
735  valx.erase(valx.begin());
736  valy.erase(valy.begin());
737  valxy.erase(valxy.begin());
738  }
739  valx.push_back(fNTNuval);
740  valy.push_back(fNTErrorRate);
741  valxy.push_back(fNTErrorRate*fNTNuval);
742 
744 
745  //
746  if (isDebug) fGDNtuple->Fill();
747  if (isVerbose) {
748  Log() << kVERBOSE << "ParamsIRE : "
749  << std::setw(10)
750  << Form("%8d",iloop+1) << " "
751  << Form("%4.4f",fNTRisk) << " "
752  << Form("%4.4f",riskPerf) << " "
753  << Form("%4.4f",fNTRisk+riskPerf) << " "
754  // << Form("%4.4f",fsigave+fbkgave) << " "
755  // << Form("%4.4f",fsigave) << " "
756  // << Form("%4.4f",fsigrms) << " "
757  // << Form("%4.4f",fbkgave) << " "
758  // << Form("%4.4f",fbkgrms) << " "
759 
760  // << Form("%4.4f",fRuleEnsemble->CoefficientRadius())
761  << Endl;
762  }
763  }
764  iloop++;
765  // Stop iteration under various conditions
766  // * The condition R(i+1)<R(i) is no longer true (when then implicit regularization is too weak)
767  // * If the current error estimate is > factor*errmin (factor = 1.1)
768  // * We have reach the last step...
769  Bool_t endOfLoop = (iloop==fGDNPathSteps);
770  if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
771  if (riskFlat) {
772  stopCondition = 1;
773  }
774  else if (endOfLoop) {
775  stopCondition = 2;
776  }
777  if (indMin<0) {
778  Log() << kWARNING << "BUG TRAP: should not be here - still, this bug is harmless;)" << Endl;
779  errmin = fNTErrorRate;
780  // riskMin = fNTRisk;
781  indMin = iloop;
782  fRuleEnsemble->GetCoefficients(coefsMin);
783  lincoefsMin = fRuleEnsemble->GetLinCoefficients();
784  offsetMin = fRuleEnsemble->GetOffset();
785  }
786  found = kTRUE;
787  }
788  done = (found);
789  }
790  Log() << Endl;
791  Log() << kINFO << "Minimisation elapsed time : " << timer.GetElapsedTime() << " " << Endl;
792  Log() << kINFO << "----------------------------------------------------------------" << Endl;
793  Log() << kINFO << "Found minimum at step " << indMin+1 << " with error = " << errmin << Endl;
794  Log() << kINFO << "Reason for ending loop: ";
795  switch (stopCondition) {
796  case 0:
797  Log() << kINFO << "clear minima found";
798  break;
799  case 1:
800  Log() << kINFO << "chaotic behaviour of risk";
801  break;
802  case 2:
803  Log() << kINFO << "end of loop reached";
804  break;
805  default:
806  Log() << kINFO << "unknown!";
807  break;
808  }
809  Log() << Endl;
810  Log() << kINFO << "----------------------------------------------------------------" << Endl;
811 
812  // check if early minima - might be an indication of too large stepsize
813  if ( Double_t(indMin)/Double_t(nprescan+fGDNPathSteps) < 0.05 ) {
814  Log() << kWARNING << "Reached minimum early in the search" << Endl;
815  Log() << "Check results and maybe decrease GDStep size" << Endl;
816  }
817  //
818  // quick check of the sign of the slope for the last npreg points
819  //
820  Double_t sumx = std::accumulate( valx.begin(), valx.end(), Double_t() );
821  Double_t sumxy = std::accumulate( valxy.begin(), valxy.end(), Double_t() );
822  Double_t sumy = std::accumulate( valy.begin(), valy.end(), Double_t() );
823  Double_t slope = Double_t(valx.size())*sumxy - sumx*sumy;
824  if (slope<0) {
825  Log() << kINFO << "The error rate was still decreasing at the end of the path" << Endl;
826  Log() << kINFO << "Increase number of steps (GDNSteps)." << Endl;
827  }
828  //
829  // set coefficients
830  //
831  if (found) {
832  fRuleEnsemble->SetCoefficients( coefsMin );
833  fRuleEnsemble->SetLinCoefficients( lincoefsMin );
834  fRuleEnsemble->SetOffset( offsetMin );
835  }
836  else {
837  Log() << kFATAL << "BUG TRAP: minimum not found in MakeGDPath()" << Endl;
838  }
839 
840  //
841  // print timing info (VERBOSE mode)
842  //
843  if (isVerbose) {
844  Double_t stloop = strisk +stupgrade + stgradvec + stperf;
845  Log() << kVERBOSE << "Timing per loop (ms):" << Endl;
846  Log() << kVERBOSE << " gradvec = " << 1000*stgradvec/iloop << Endl;
847  Log() << kVERBOSE << " upgrade = " << 1000*stupgrade/iloop << Endl;
848  Log() << kVERBOSE << " risk = " << 1000*strisk/iloop << Endl;
849  Log() << kVERBOSE << " perf = " << 1000*stperf/iloop << Endl;
850  Log() << kVERBOSE << " loop = " << 1000*stloop/iloop << Endl;
851  //
852  Log() << kVERBOSE << " GDInit = " << 1000*gGDInit/iloop << Endl;
853  Log() << kVERBOSE << " GDPtr = " << 1000*gGDPtr/iloop << Endl;
854  Log() << kVERBOSE << " GDEval = " << 1000*gGDEval/iloop << Endl;
855  Log() << kVERBOSE << " GDEvalRule = " << 1000*gGDEvalRule/iloop << Endl;
856  Log() << kVERBOSE << " GDNorm = " << 1000*gGDNorm/iloop << Endl;
857  Log() << kVERBOSE << " GDRuleLoop = " << 1000*gGDRuleLoop/iloop << Endl;
858  Log() << kVERBOSE << " GDLinLoop = " << 1000*gGDLinLoop/iloop << Endl;
859  }
860  //
861  // write ntuple (DEBUG)
862  if (isDebug) fGDNtuple->Write();
863 }
864 
865 ////////////////////////////////////////////////////////////////////////////////
866 /// helper function to store the rule coefficients in local arrays
867 
869 {
870  fNTOffset = fRuleEnsemble->GetOffset();
871  //
872  for (UInt_t i=0; i<fNRules; i++) {
873  fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
874  }
875  for (UInt_t i=0; i<fNLinear; i++) {
876  fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
877  }
878 }
879 
880 ////////////////////////////////////////////////////////////////////////////////
881 /// Estimates F* (optimum scoring function) for all events for the given sets.
882 /// The result is used in ErrorRateReg().
883 /// --- NOT USED ---
884 
886 {
887  Log() << kWARNING << "<CalcFStar> Using unverified code! Check!" << Endl;
888  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
889  if (neve<1) {
890  Log() << kFATAL << "<CalcFStar> Invalid start/end indices!" << Endl;
891  return;
892  }
893  //
894  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
895  //
896  fFstar.clear();
897  std::vector<Double_t> fstarSorted;
898  Double_t fstarVal;
899  // loop over all events and estimate F* for each event
900  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
901  const Event& e = *(*events)[i];
902  fstarVal = fRuleEnsemble->FStar(e);
903  fFstar.push_back(fstarVal);
904  fstarSorted.push_back(fstarVal);
905  if (TMath::IsNaN(fstarVal)) Log() << kFATAL << "F* is NAN!" << Endl;
906  }
907  // sort F* and find median
908  std::sort( fstarSorted.begin(), fstarSorted.end() );
909  UInt_t ind = neve/2;
910  if (neve&1) { // odd number of events
911  fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
912  }
913  else { // even
914  fFstarMedian = fstarSorted[ind];
915  }
916 }
917 
918 ////////////////////////////////////////////////////////////////////////////////
919 /// implementation of eq. 7.17 in Hastie,Tibshirani & Friedman book
920 /// this is the covariance between the estimated response yhat and the
921 /// true value y.
922 /// NOT REALLY SURE IF THIS IS CORRECT!
923 /// --- THIS IS NOT USED ---
924 
926 {
927  Log() << kWARNING << "<Optimism> Using unverified code! Check!" << Endl;
928  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
929  if (neve<1) {
930  Log() << kFATAL << "<Optimism> Invalid start/end indices!" << Endl;
931  }
932  //
933  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
934  //
935  Double_t sumy=0;
936  Double_t sumyhat=0;
937  Double_t sumyhaty=0;
938  Double_t sumw2=0;
939  Double_t sumw=0;
940  Double_t yhat;
941  Double_t y;
942  Double_t w;
943  //
944  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
945  const Event& e = *(*events)[i];
946  yhat = fRuleEnsemble->EvalEvent(i); // evaluated using the model
947  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0); // the truth
948  w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf; // the weight, reweighted such that sum=1
949  sumy += w*y;
950  sumyhat += w*yhat;
951  sumyhaty += w*yhat*y;
952  sumw2 += w*w;
953  sumw += w;
954  }
955  Double_t div = 1.0-sumw2;
956  Double_t cov = sumyhaty - sumyhat*sumy;
957  return 2.0*cov/div;
958 }
959 
960 ////////////////////////////////////////////////////////////////////////////////
961 /// Estimates the error rate with the current set of parameters
962 /// This code is pretty messy at the moment.
963 /// Cleanup is needed.
964 /// -- NOT USED ---
965 
967 {
968  Log() << kWARNING << "<ErrorRateReg> Using unverified code! Check!" << Endl;
969  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
970  if (neve<1) {
971  Log() << kFATAL << "<ErrorRateReg> Invalid start/end indices!" << Endl;
972  }
973  if (fFstar.size()!=neve) {
974  Log() << kFATAL << "--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
975  << " Fstar.size() = " << fFstar.size() << " , N(events) = " << neve << Endl;
976  }
977  //
978  Double_t sF;
979  //
980  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
981  //
982  Double_t sumdf = 0;
983  Double_t sumdfmed = 0;
984  //
985  // A bit messy here.
986  // I believe the binary error classification is appropriate here.
987  // The problem is stability.
988  //
989  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
990  const Event& e = *(*events)[i];
991  sF = fRuleEnsemble->EvalEvent( e );
992  // scaled abs error, eq 20 in RuleFit paper
993  sumdf += TMath::Abs(fFstar[i-fPerfIdx1] - sF);
994  sumdfmed += TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
995  }
996  // scaled abs error, eq 20
997  // This error (df) is large - need to think on how to compensate...
998  //
999  return sumdf/sumdfmed;
1000 }
1001 
1002 ////////////////////////////////////////////////////////////////////////////////
1003 ///
1004 /// Estimates the error rate with the current set of parameters
1005 /// It uses a binary estimate of (y-F*(x))
1006 /// (y-F*(x)) = (Num of events where sign(F)!=sign(y))/Neve
1007 /// y = {+1 if event is signal, -1 otherwise}
1008 /// --- NOT USED ---
1009 
1011 {
1012  Log() << kWARNING << "<ErrorRateBin> Using unverified code! Check!" << Endl;
1013  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1014  if (neve<1) {
1015  Log() << kFATAL << "<ErrorRateBin> Invalid start/end indices!" << Endl;
1016  }
1017  //
1018  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1019  //
1020  Double_t sumdfbin = 0;
1021  Double_t dneve = Double_t(neve);
1022  Int_t signF, signy;
1023  Double_t sF;
1024  //
1025  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1026  const Event& e = *(*events)[i];
1027  sF = fRuleEnsemble->EvalEvent( e );
1028  // Double_t sFstar = fRuleEnsemble->FStar(e); // THIS CAN BE CALCULATED ONCE!
1029  signF = (sF>0 ? +1:-1);
1030  // signy = (sFStar>0 ? +1:-1);
1031  signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
1032  sumdfbin += TMath::Abs(Double_t(signF-signy))*0.5;
1033  }
1034  Double_t f = sumdfbin/dneve;
1035  // Double_t df = f*TMath::Sqrt((1.0/sumdfbin) + (1.0/dneve));
1036  return f;
1037 }
1038 
1039 ////////////////////////////////////////////////////////////////////////////////
1040 /// Estimates the error rate with the current set of parameters.
1041 /// It calculates the area under the bkg rejection vs signal efficiency curve.
1042 /// The value returned is 1-area.
1043 
1044 Double_t TMVA::RuleFitParams::ErrorRateRocRaw( std::vector<Double_t> & sFsig,
1045  std::vector<Double_t> & sFbkg )
1046 
1047 {
1048  std::sort(sFsig.begin(), sFsig.end());
1049  std::sort(sFbkg.begin(), sFbkg.end());
1050  const Double_t minsig = sFsig.front();
1051  const Double_t minbkg = sFbkg.front();
1052  const Double_t maxsig = sFsig.back();
1053  const Double_t maxbkg = sFbkg.back();
1054  const Double_t minf = std::min(minsig,minbkg);
1055  const Double_t maxf = std::max(maxsig,maxbkg);
1056  const Int_t nsig = Int_t(sFsig.size());
1057  const Int_t nbkg = Int_t(sFbkg.size());
1058  const Int_t np = std::min((nsig+nbkg)/4,50);
1059  const Double_t df = (maxf-minf)/(np-1);
1060  //
1061  // calculate area under rejection/efficiency curve
1062  //
1063  Double_t fcut;
1064  std::vector<Double_t>::const_iterator indit;
1065  Int_t nrbkg;
1066  Int_t nesig;
1067  Int_t pnesig=0;
1068  Double_t rejb=0;
1069  Double_t effs=1.0;
1070  Double_t prejb=0;
1071  Double_t peffs=1.0;
1072  // Double_t drejb;
1073  Double_t deffs;
1074  Double_t area=0;
1075  Int_t npok=0;
1076  //
1077  // loop over range of F [minf,maxf]
1078  //
1079  for (Int_t i=0; i<np; i++) {
1080  fcut = minf + df*Double_t(i);
1081  indit = std::find_if(sFsig.begin(), sFsig.end(),
1082  std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1083  nesig = sFsig.end()-indit; // number of sig accepted with F>cut
1084  if (TMath::Abs(pnesig-nesig)>0) {
1085  npok++;
1086  indit = std::find_if(sFbkg.begin(), sFbkg.end(),
1087  std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1088  nrbkg = indit-sFbkg.begin(); // number of bkg rejected with F>cut
1089  rejb = Double_t(nrbkg)/Double_t(nbkg);
1090  effs = Double_t(nesig)/Double_t(nsig);
1091  //
1092  // drejb = rejb-prejb;
1093  deffs = effs-peffs;
1094  area += 0.5*TMath::Abs(deffs)*(rejb+prejb); // trapezoid
1095  prejb = rejb;
1096  peffs = effs;
1097  }
1098  pnesig = nesig;
1099  }
1100  area += 0.5*(1+rejb)*effs; // extrapolate to the end point
1101 
1102  return (1.0-area);
1103 }
1104 
1105 ////////////////////////////////////////////////////////////////////////////////
1106 /// Estimates the error rate with the current set of parameters.
1107 /// It calculates the area under the bkg rejection vs signal efficiency curve.
1108 /// The value returned is 1-area.
1109 /// This works but is less efficient than calculating the Risk using RiskPerf().
1110 
1112 {
1113  Log() << kWARNING << "<ErrorRateRoc> Should not be used in the current version! Check!" << Endl;
1114  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1115  if (neve<1) {
1116  Log() << kFATAL << "<ErrorRateRoc> Invalid start/end indices!" << Endl;
1117  }
1118  //
1119  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1120  //
1121  Double_t sF;
1122  //
1123  std::vector<Double_t> sFsig;
1124  std::vector<Double_t> sFbkg;
1125  Double_t sumfsig=0;
1126  Double_t sumfbkg=0;
1127  Double_t sumf2sig=0;
1128  Double_t sumf2bkg=0;
1129  //
1130  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1131  const Event& e = *(*events)[i];
1132  sF = fRuleEnsemble->EvalEvent(i);// * fRuleFit->GetTrainingEventWeight(i);
1133  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
1134  sFsig.push_back(sF);
1135  sumfsig +=sF;
1136  sumf2sig +=sF*sF;
1137  }
1138  else {
1139  sFbkg.push_back(sF);
1140  sumfbkg +=sF;
1141  sumf2bkg +=sF*sF;
1142  }
1143  }
1144  fsigave = sumfsig/sFsig.size();
1145  fbkgave = sumfbkg/sFbkg.size();
1146  fsigrms = TMath::Sqrt(gTools().ComputeVariance(sumf2sig,sumfsig,sFsig.size()));
1147  fbkgrms = TMath::Sqrt(gTools().ComputeVariance(sumf2bkg,sumfbkg,sFbkg.size()));
1148  //
1149  return ErrorRateRocRaw( sFsig, sFbkg );
1150 }
1151 
1152 ////////////////////////////////////////////////////////////////////////////////
1153 /// Estimates the error rate with the current set of parameters.
1154 /// It calculates the area under the bkg rejection vs signal efficiency curve.
1155 /// The value returned is 1-area.
1156 ///
1157 /// See comment under ErrorRateRoc().
1158 
1160 {
1161  Log() << kWARNING << "<ErrorRateRocTst> Should not be used in the current version! Check!" << Endl;
1162  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1163  if (neve<1) {
1164  Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1165  return;
1166  }
1167  //
1168  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1169  //
1170  // std::vector<Double_t> sF;
1171  Double_t sF;
1172  std::vector< std::vector<Double_t> > sFsig;
1173  std::vector< std::vector<Double_t> > sFbkg;
1174  //
1175  sFsig.resize( fGDNTau );
1176  sFbkg.resize( fGDNTau );
1177  // sF.resize( fGDNTau );
1178 
1179  for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1180  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1181  // if (itau==0) sF = fRuleEnsemble->EvalEvent( *(*events)[i], fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1182  // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1183  sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1184  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1185  sFsig[itau].push_back(sF);
1186  }
1187  else {
1188  sFbkg[itau].push_back(sF);
1189  }
1190  }
1191  }
1192  Double_t err;
1193 
1194  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1195  err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1196  fGDErrTst[itau] = err;
1197  }
1198 }
1199 
1200 ////////////////////////////////////////////////////////////////////////////////
1201 /// Estimates the error rate with the current set of parameters.
1202 /// using the <Perf> subsample.
1203 /// Return the tau index giving the lowest error
1204 
1206 {
1207  UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1208  if (neve<1) {
1209  Log() << kFATAL << "<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1210  return 0;
1211  }
1212  //
1213  Double_t sumx = 0;
1214  Double_t sumx2 = 0;
1215  Double_t maxx = -100.0;
1216  Double_t minx = 1e30;
1217  UInt_t itaumin = 0;
1218  UInt_t nok=0;
1219  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1220  if (fGDErrTstOK[itau]) {
1221  nok++;
1222  fGDErrTst[itau] = RiskPerf(itau);
1223  sumx += fGDErrTst[itau];
1224  sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1225  if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1226  if (fGDErrTst[itau]<minx) {
1227  minx=fGDErrTst[itau];
1228  itaumin = itau;
1229  }
1230  }
1231  }
1232  Double_t sigx = TMath::Sqrt(gTools().ComputeVariance( sumx2, sumx, nok ) );
1233  Double_t maxacc = minx+sigx;
1234  //
1235  if (nok>0) {
1236  nok = 0;
1237  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1238  if (fGDErrTstOK[itau]) {
1239  if (fGDErrTst[itau] > maxacc) {
1240  fGDErrTstOK[itau] = kFALSE;
1241  }
1242  else {
1243  nok++;
1244  }
1245  }
1246  }
1247  }
1248  fGDNTauTstOK = nok;
1249  Log() << kVERBOSE << "TAU: "
1250  << itaumin << " "
1251  << nok << " "
1252  << minx << " "
1253  << maxx << " "
1254  << sigx << Endl;
1255  //
1256  return itaumin;
1257 }
1258 
1259 ////////////////////////////////////////////////////////////////////////////////
1260 /// make test gradient vector for all tau
1261 /// same algorithm as MakeGradientVector()
1262 
1264 {
1265  UInt_t neve = fPathIdx1-fPathIdx2+1;
1266  if (neve<1) {
1267  Log() << kFATAL << "<MakeTstGradientVector> Invalid start/end indices!" << Endl;
1268  return;
1269  }
1270  //
1271  Double_t norm = 2.0/fNEveEffPath;
1272  //
1273  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1274 
1275  // Clear gradient vectors
1276  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1277  if (fGDErrTstOK[itau]) {
1278  for (UInt_t ir=0; ir<fNRules; ir++) {
1279  fGradVecTst[itau][ir]=0;
1280  }
1281  for (UInt_t il=0; il<fNLinear; il++) {
1282  fGradVecLinTst[itau][il]=0;
1283  }
1284  }
1285  }
1286  //
1287  // Double_t val; // temp store
1288  Double_t sF; // score function value
1289  Double_t r; // eq 35, ref 1
1290  Double_t y; // true score (+1 or -1)
1291  const std::vector<UInt_t> *eventRuleMap=0;
1292  UInt_t rind;
1293  //
1294  // Loop over all events
1295  //
1296  UInt_t nsfok=0;
1297  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1298  const Event *e = (*events)[i];
1299  UInt_t nrules=0;
1300  if (fRuleEnsemble->DoRules()) {
1301  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1302  nrules = (*eventRuleMap).size();
1303  }
1304  for (UInt_t itau=0; itau<fGDNTau; itau++) { // loop over tau
1305  // if (itau==0) sF = fRuleEnsemble->EvalEvent( *e, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1306  // else sF = fRuleEnsemble->EvalEvent( fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1307  if (fGDErrTstOK[itau]) {
1308  sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1309  if (TMath::Abs(sF)<1.0) {
1310  nsfok++;
1311  r = 0;
1312  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1313  r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1314  // rule gradient vector
1315  for (UInt_t ir=0; ir<nrules; ir++) {
1316  rind = (*eventRuleMap)[ir];
1317  fGradVecTst[itau][rind] += r;
1318  }
1319  // linear terms
1320  for (UInt_t il=0; il<fNLinear; il++) {
1321  fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i, kTRUE );
1322  }
1323  } // if (TMath::Abs(F)<xxx)
1324  }
1325  }
1326  }
1327 }
1328 
1329 ////////////////////////////////////////////////////////////////////////////////
1330 /// Establish maximum gradient for rules, linear terms and the offset
1331 /// for all taus TODO: do not need index range!
1332 
1334 {
1335  Double_t maxr, maxl, cthresh, val;
1336  // loop over all taus
1337  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1338  if (fGDErrTstOK[itau]) {
1339  // find max gradient
1340  maxr = ( (fNRules>0 ?
1341  TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(), AbsValue()))):0) );
1342  maxl = ( (fNLinear>0 ?
1343  TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(), AbsValue()))):0) );
1344 
1345  // Use the maximum as a threshold
1346  Double_t maxv = (maxr>maxl ? maxr:maxl);
1347  cthresh = maxv * fGDTauVec[itau];
1348 
1349  // Add to offset, if gradient is large enough:
1350  // Loop over the gradient vector and move to next set of coefficients
1351  // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1352  //
1353  // Step-size is divided by 10 when looking for the best path.
1354  //
1355  if (maxv>0) {
1356  const Double_t stepScale=1.0;
1357  for (UInt_t i=0; i<fNRules; i++) {
1358  val = fGradVecTst[itau][i];
1359 
1360  if (TMath::Abs(val)>=cthresh) {
1361  fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1362  }
1363  }
1364  // Loop over the gradient vector for the linear part and move to next set of coefficients
1365  for (UInt_t i=0; i<fNLinear; i++) {
1366  val = fGradVecLinTst[itau][i];
1367  if (TMath::Abs(val)>=cthresh) {
1368  fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1369  }
1370  }
1371  }
1372  }
1373  }
1374  // set the offset - should be outside the itau loop!
1375  CalcTstAverageResponse();
1376 }
1377 
1378 ////////////////////////////////////////////////////////////////////////////////
1379 /// make gradient vector
1380 
1382 {
1383  clock_t t0;
1384  // clock_t t10;
1385  t0 = clock();
1386  //
1387  UInt_t neve = fPathIdx2-fPathIdx1+1;
1388  if (neve<1) {
1389  Log() << kFATAL << "<MakeGradientVector> Invalid start/end indices!" << Endl;
1390  return;
1391  }
1392  //
1393  const Double_t norm = 2.0/fNEveEffPath;
1394  //
1395  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1396 
1397  // Clear gradient vectors
1398  for (UInt_t ir=0; ir<fNRules; ir++) {
1399  fGradVec[ir]=0;
1400  }
1401  for (UInt_t il=0; il<fNLinear; il++) {
1402  fGradVecLin[il]=0;
1403  }
1404  //
1405  // Double_t val; // temp store
1406  Double_t sF; // score function value
1407  Double_t r; // eq 35, ref 1
1408  Double_t y; // true score (+1 or -1)
1409  const std::vector<UInt_t> *eventRuleMap=0;
1410  UInt_t rind;
1411  //
1412  gGDInit += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1413 
1414  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1415  const Event *e = (*events)[i];
1416 
1417  // t0 = clock(); //DEB
1418  sF = fRuleEnsemble->EvalEvent( i ); // should not contain the weight
1419  // gGDEval += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1420  if (TMath::Abs(sF)<1.0) {
1421  UInt_t nrules=0;
1422  if (fRuleEnsemble->DoRules()) {
1423  eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1424  nrules = (*eventRuleMap).size();
1425  }
1426  y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1427  r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1428  // rule gradient vector
1429  for (UInt_t ir=0; ir<nrules; ir++) {
1430  rind = (*eventRuleMap)[ir];
1431  fGradVec[rind] += r;
1432  }
1433  // gGDRuleLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1434  // linear terms
1435  // t0 = clock(); //DEB
1436  for (UInt_t il=0; il<fNLinear; il++) {
1437  fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i, kTRUE );
1438  }
1439  // gGDLinLoop += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1440  } // if (TMath::Abs(F)<xxx)
1441  }
1442 }
1443 
1444 ////////////////////////////////////////////////////////////////////////////////
1445 /// Establish maximum gradient for rules, linear terms and the offset
1446 
1448 {
1449  Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1450  TMath::Abs(*(std::max_element( fGradVec.begin(), fGradVec.end(), AbsValue()))):0) );
1451  Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1452  TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(), AbsValue()))):0) );
1453  // Use the maximum as a threshold
1454  Double_t maxv = (maxr>maxl ? maxr:maxl);
1455  Double_t cthresh = maxv * fGDTau;
1456 
1457  Double_t useRThresh;
1458  Double_t useLThresh;
1459  //
1460  // Choose thresholds.
1461  //
1462  useRThresh = cthresh;
1463  useLThresh = cthresh;
1464 
1465  Double_t gval, lval, coef, lcoef;
1466 
1467  // Add to offset, if gradient is large enough:
1468  // Loop over the gradient vector and move to next set of coefficients
1469  // size of GradVec (and GradVecLin) should be 0 if learner is disabled
1470  if (maxv>0) {
1471  for (UInt_t i=0; i<fGradVec.size(); i++) {
1472  gval = fGradVec[i];
1473  if (TMath::Abs(gval)>=useRThresh) {
1474  coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1475  fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1476  }
1477  }
1478 
1479  // Loop over the gradient vector for the linear part and move to next set of coefficients
1480  for (UInt_t i=0; i<fGradVecLin.size(); i++) {
1481  lval = fGradVecLin[i];
1482  if (TMath::Abs(lval)>=useLThresh) {
1483  lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
1484  fRuleEnsemble->SetLinCoefficient(i,lcoef);
1485  }
1486  }
1487  // Set the offset
1488  Double_t offset = CalcAverageResponse();
1489  fRuleEnsemble->SetOffset( offset );
1490  }
1491 }
1492 
1493 ////////////////////////////////////////////////////////////////////////////////
1494 /// calc average response for all test paths - TODO: see comment under CalcAverageResponse()
1495 /// note that 0 offset is used
1496 
1498 {
1499  for (UInt_t itau=0; itau<fGDNTau; itau++) {
1500  if (fGDErrTstOK[itau]) {
1501  fGDOfsTst[itau] = 0;
1502  for (UInt_t s=0; s<fNLinear; s++) {
1503  fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
1504  }
1505  for (UInt_t r=0; r<fNRules; r++) {
1506  fGDOfsTst[itau] -= fGDCoefTst[itau][r] * fAverageRulePath[r];
1507  }
1508  }
1509  }
1510  //
1511 }
1512 
1513 ////////////////////////////////////////////////////////////////////////////////
1514 /// calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
1515 ///
1516 /// note that 0 offset is used
1517 
1519 {
1520  Double_t ofs = 0;
1521  for (UInt_t s=0; s<fNLinear; s++) {
1522  ofs -= fRuleEnsemble->GetLinCoefficients(s) * fAverageSelectorPath[s];
1523  }
1524  for (UInt_t r=0; r<fNRules; r++) {
1525  ofs -= fRuleEnsemble->GetRules(r)->GetCoefficient() * fAverageRulePath[r];
1526  }
1527  return ofs;
1528 }
1529 
1530 ////////////////////////////////////////////////////////////////////////////////
1531 /// calculate the average truth
1532 
1534 {
1535  if (fPathIdx2<=fPathIdx1) {
1536  Log() << kFATAL << "<CalcAverageTruth> Invalid start/end indices!" << Endl;
1537  return 0;
1538  }
1539  Double_t sum=0;
1540  Double_t ensig=0;
1541  Double_t enbkg=0;
1542  const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1543  for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1544  Double_t ew = fRuleFit->GetTrainingEventWeight(i);
1545  if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1546  else enbkg += ew;
1547  sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1548  }
1549  Log() << kVERBOSE << "Effective number of signal / background = " << ensig << " / " << enbkg << Endl;
1550 
1551  return sum/fNEveEffPath;
1552 }
1553 
1554 ////////////////////////////////////////////////////////////////////////////////
1555 
1557  return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
1558 }
1559 
1560 ////////////////////////////////////////////////////////////////////////////////
1561 
1563  fLogger->SetMinType(t);
1564 }
TMVA::RuleFitParams::RiskPerfTst
UInt_t RiskPerfTst()
Estimates the error rate with the current set of parameters.
Definition: RuleFitParams.cxx:1205
TMVA::RuleFitParams::ErrorRateRocRaw
Double_t ErrorRateRocRaw(std::vector< Double_t > &sFsig, std::vector< Double_t > &sFbkg)
Estimates the error rate with the current set of parameters.
Definition: RuleFitParams.cxx:1044
TMVA::RuleFitParams::CalcAverageTruth
Double_t CalcAverageTruth()
calculate the average truth
Definition: RuleFitParams.cxx:1533
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:91
MethodRuleFit.h
e
#define e(i)
Definition: RSha256.hxx:103
f
#define f(i)
Definition: RSha256.hxx:104
TMVA::RuleFitParams::ErrorRateRocTst
void ErrorRateRocTst()
Estimates the error rate with the current set of parameters.
Definition: RuleFitParams.cxx:1159
TMath::Max
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:212
TMVA::RuleFitParams::UpdateCoefficients
void UpdateCoefficients()
Establish maximum gradient for rules, linear terms and the offset.
Definition: RuleFitParams.cxx:1447
DataSetInfo.h
Form
char * Form(const char *fmt,...)
TMVA::AbsValue
Definition: RuleFitParams.h:260
r
ROOT::R::TRInterface & r
Definition: Object.C:4
sum
static uint64_t sum(uint64_t i)
Definition: Factory.cxx:2345
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:760
TMVA::RuleFitParams::~RuleFitParams
virtual ~RuleFitParams()
destructor
Definition: RuleFitParams.cxx:104
RuleFit.h
TTree
A TTree represents a columnar dataset.
Definition: TTree.h:79
TMath::Sqrt
Double_t Sqrt(Double_t x)
Definition: TMath.h:691
TMVA::LossFunction
Definition: LossFunction.h:74
RuleFitParams.h
TGeant4Unit::s
static constexpr double s
Definition: TGeant4SystemOfUnits.h:162
TMVA::RuleFitParams::InitNtuple
void InitNtuple()
initializes the ntuple
Definition: RuleFitParams.cxx:185
Int_t
int Int_t
Definition: RtypesCore.h:45
rprev
#define rprev(otri1, otri2)
Definition: triangle.c:1024
TMVA::Timer::DrawProgressBar
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:202
TMVA::RuleFitParams::Type
Int_t Type(const Event *e) const
Definition: RuleFitParams.cxx:1556
TMVA::RuleFitParams::UpdateTstCoefficients
void UpdateTstCoefficients()
Establish maximum gradient for rules, linear terms and the offset for all taus TODO: do not need inde...
Definition: RuleFitParams.cxx:1333
gGDPtr
Double_t gGDPtr
Definition: RuleFitParams.cxx:54
TMVA::RuleFitParams::Init
void Init()
Initializes all parameters using the RuleEnsemble and the training tree.
Definition: RuleFitParams.cxx:114
TMath::Abs
Short_t Abs(Short_t d)
Definition: TMathBase.h:120
gGDRuleLoop
Double_t gGDRuleLoop
Definition: RuleFitParams.cxx:58
TTree.h
Bool_t
bool Bool_t
Definition: RtypesCore.h:63
TMVA::RuleFitParams::ErrorRateReg
Double_t ErrorRateReg()
Estimates the error rate with the current set of parameters This code is pretty messy at the moment.
Definition: RuleFitParams.cxx:966
TMVA::RuleFitParams::CalcTstAverageResponse
void CalcTstAverageResponse()
calc average response for all test paths - TODO: see comment under CalcAverageResponse() note that 0 ...
Definition: RuleFitParams.cxx:1497
bool
TMVA::RuleFitParams::MakeGDPath
void MakeGDPath()
The following finds the gradient directed path in parameter space.
Definition: RuleFitParams.cxx:538
TMVA::RuleFitParams::FillCoefficients
void FillCoefficients()
helper function to store the rule coefficients in local arrays
Definition: RuleFitParams.cxx:868
gGDLinLoop
Double_t gGDLinLoop
Definition: RuleFitParams.cxx:59
TMVA::Timer::GetElapsedTime
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:146
MsgLogger.h
TMVA::RuleFitParams::EvaluateAverage
void EvaluateAverage(UInt_t ind1, UInt_t ind2, std::vector< Double_t > &avsel, std::vector< Double_t > &avrul)
evaluate the average of each variable and f(x) in the given range
Definition: RuleFitParams.cxx:208
Timer.h
gGDInit
Double_t gGDInit
Definition: RuleFitParams.cxx:53
h
#define h(i)
Definition: RSha256.hxx:106
TMVA::RuleFitParams::Penalty
Double_t Penalty() const
This is the "lasso" penalty To be used for regression.
Definition: RuleFitParams.cxx:356
TMath::IsNaN
Bool_t IsNaN(Double_t x)
Definition: TMath.h:892
TMVA::RuleFitParams::InitGD
void InitGD()
Initialize GD path search.
Definition: RuleFitParams.cxx:373
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:92
RuleEnsemble.h
UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:46
y
Double_t y[n]
Definition: legend1.C:17
Types.h
TMath::Min
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:180
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
unsigned int
TMVA::Timer
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TMVA::RuleFitParams::LossFunction
Double_t LossFunction(const Event &e) const
Implementation of squared-error ramp loss function (eq 39,40 in ref 1) This is used for binary Classi...
Definition: RuleFitParams.cxx:278
TMVA::RuleFitParams::MakeTstGradientVector
void MakeTstGradientVector()
make test gradient vector for all tau same algorithm as MakeGradientVector()
Definition: RuleFitParams.cxx:1263
gGDNorm
Double_t gGDNorm
Definition: RuleFitParams.cxx:55
TMVA::RuleFitParams::SetMsgType
void SetMsgType(EMsgType t)
Definition: RuleFitParams.cxx:1562
gGDEval
Double_t gGDEval
Definition: RuleFitParams.cxx:56
gFIRSTORG
Bool_t gFIRSTORG
Definition: RuleFitParams.cxx:51
Double_t
double Double_t
Definition: RtypesCore.h:59
TMVA::RuleFitParams::CalcAverageResponse
Double_t CalcAverageResponse()
calculate the average response - TODO : rewrite bad dependancy on EvaluateAverage() !
Definition: RuleFitParams.cxx:1518
TMVA::MsgLogger
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
gFIRSTTST
Bool_t gFIRSTTST
Definition: RuleFitParams.cxx:50
TObject::Write
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:795
TMVA::Event
Definition: Event.h:51
TMVA::RuleFitParams::RuleFitParams
RuleFitParams()
constructor
Definition: RuleFitParams.cxx:64
Tools.h
gGDEvalRule
Double_t gGDEvalRule
Definition: RuleFitParams.cxx:57
TMVA::RuleFitParams::Risk
Double_t Risk(UInt_t ind1, UInt_t ind2, Double_t neff) const
risk assessment
Definition: RuleFitParams.cxx:314
TMVA::gTools
Tools & gTools()
TMVA::RuleFitParams::CalcFStar
void CalcFStar()
Estimates F* (optimum scoring function) for all events for the given sets.
Definition: RuleFitParams.cxx:885
TMVA::RuleFitParams::Optimism
Double_t Optimism()
implementation of eq.
Definition: RuleFitParams.cxx:925
TMVA::RuleFitParams::MakeGradientVector
void MakeGradientVector()
make gradient vector
Definition: RuleFitParams.cxx:1381
TMVA::RuleFitParams::ErrorRateBin
Double_t ErrorRateBin()
Estimates the error rate with the current set of parameters It uses a binary estimate of (y-F*(x)) (y...
Definition: RuleFitParams.cxx:1010
TMath.h
TMVA::RuleFitParams::FindGDTau
Int_t FindGDTau()
This finds the cutoff parameter tau by scanning several different paths.
Definition: RuleFitParams.cxx:449
int
TMVA::RuleFitParams::ErrorRateRoc
Double_t ErrorRateRoc()
Estimates the error rate with the current set of parameters.
Definition: RuleFitParams.cxx:1111