Logo ROOT  
Reference Guide
OptimizeConfigParameters.cxx
Go to the documentation of this file.
1/**********************************************************************************
2 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
3 * Package: TMVA *
4 * Class : OptimizeConfigParameters *
5 * Web : http://tmva.sourceforge.net *
6 * *
7 * Description: The OptimizeConfigParameters takes care of "scanning/fitting" *
8 * different tuning parameters in order to find the best set of *
9 * tuning paraemters which will be used in the end *
10 * *
11 * Authors (alphabetical): *
12 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
13 * *
14 * Copyright (c) 2005: *
15 * CERN, Switzerland *
16 * MPI-K Heidelberg, Germany *
17 * *
18 * Redistribution and use in source and binary forms, with or without *
19 * modification, are permitted according to the terms listed in LICENSE *
20 * (http://ttmva.sourceforge.net/LICENSE) *
21 **********************************************************************************/
22
23/*! \class TMVA::OptimizeConfigParameters
24\ingroup TMVA
25
26*/
27
29#include "TMVA/Config.h"
30#include "TMVA/DataSet.h"
31#include "TMVA/DataSetInfo.h"
32#include "TMVA/Event.h"
33#include "TMVA/IFitterTarget.h"
34#include "TMVA/FitterBase.h"
35#include "TMVA/GeneticFitter.h"
36#include "TMVA/IMethod.h"
37#include "TMVA/Interval.h"
38#include "TMVA/MethodBase.h"
39#include "TMVA/MethodFDA.h"
40#include "TMVA/MsgLogger.h"
41#include "TMVA/MinuitFitter.h"
42#include "TMVA/PDF.h"
43#include "TMVA/Tools.h"
44#include "TMVA/Types.h"
45
46#include "TDirectory.h"
47#include "TGraph.h"
48#include "TH1.h"
49#include "TH2.h"
50#include "TMath.h"
51
52#include <cstdlib>
53#include <limits>
54
55
57
58////////////////////////////////////////////////////////////////////////////////
59/// Constructor which sets either "Classification or Regression"
60
61TMVA::OptimizeConfigParameters::OptimizeConfigParameters(MethodBase * const method, std::map<TString,TMVA::Interval*> tuneParameters, TString fomType, TString optimizationFitType)
62: fMethod(method),
63 fTuneParameters(tuneParameters),
64 fFOMType(fomType),
65 fOptimizationFitType(optimizationFitType),
66 fMvaSig(NULL),
67 fMvaBkg(NULL),
68 fMvaSigFineBin(NULL),
69 fMvaBkgFineBin(NULL),
70 fNotDoneYet(kFALSE)
71{
72 std::string name = "OptimizeConfigParameters_";
73 name += std::string(GetMethod()->GetName());
74 fLogger = new MsgLogger(name);
75 if (fMethod->DoRegression()){
76 Log() << kFATAL << " ERROR: Sorry, Regression is not yet implement for automatic parameter optimization"
77 << " --> exit" << Endl;
78 }
79
80 Log() << kINFO << "Automatic optimisation of tuning parameters in "
81 << GetMethod()->GetName() << " uses:" << Endl;
82
83 std::map<TString,TMVA::Interval*>::iterator it;
84 for (it=fTuneParameters.begin(); it!=fTuneParameters.end();++it) {
85 Log() << kINFO << it->first
86 << " in range from: " << it->second->GetMin()
87 << " to: " << it->second->GetMax()
88 << " in : " << it->second->GetNbins() << " steps"
89 << Endl;
90 }
91 Log() << kINFO << " using the options: " << fFOMType << " and " << fOptimizationFitType << Endl;
92}
93
94////////////////////////////////////////////////////////////////////////////////
95/// the destructor (delete the OptimizeConfigParameters, store the graph and .. delete it)
96
98{
99 if(!GetMethod()->IsSilentFile()) GetMethod()->BaseDir()->cd();
100 Int_t n=Int_t(fFOMvsIter.size());
101 Float_t *x = new Float_t[n];
102 Float_t *y = new Float_t[n];
103 Float_t ymin=(Float_t)+999999999;
104 Float_t ymax=(Float_t)-999999999;
105
106 for (Int_t i=0;i<n;i++){
107 x[i] = Float_t(i);
108 y[i] = fFOMvsIter[i];
109 if (ymin>y[i]) ymin=y[i];
110 if (ymax<y[i]) ymax=y[i];
111 }
112
113 TH2D *h=new TH2D(TString(GetMethod()->GetName())+"_FOMvsIterFrame","",2,0,n,2,ymin*0.95,ymax*1.05);
114 h->SetXTitle("#iteration "+fOptimizationFitType);
115 h->SetYTitle(fFOMType);
116 TGraph *gFOMvsIter = new TGraph(n,x,y);
117 gFOMvsIter->SetName((TString(GetMethod()->GetName())+"_FOMvsIter").Data());
118 if(!GetMethod()->IsSilentFile()) gFOMvsIter->Write();
119 if(!GetMethod()->IsSilentFile()) h->Write();
120
121 delete [] x;
122 delete [] y;
123 // delete fFOMvsIter;
124}
125
126////////////////////////////////////////////////////////////////////////////////
127
129{
130 if (fOptimizationFitType == "Scan" ) this->optimizeScan();
131 else if (fOptimizationFitType == "FitGA" || fOptimizationFitType == "Minuit" ) this->optimizeFit();
132 else {
133 Log() << kFATAL << "You have chosen as optimization type " << fOptimizationFitType
134 << " that is not (yet) coded --> exit()" << Endl;
135 }
136
137 Log() << kINFO << "For " << GetMethod()->GetName() << " the optimized Parameters are: " << Endl;
138 std::map<TString,Double_t>::iterator it;
139 for(it=fTunedParameters.begin(); it!= fTunedParameters.end(); ++it){
140 Log() << kINFO << it->first << " = " << it->second << Endl;
141 }
142 return fTunedParameters;
143
144}
145
146////////////////////////////////////////////////////////////////////////////////
147/// helper function to scan through the all the combinations in the
148/// parameter space
149
150std::vector< int > TMVA::OptimizeConfigParameters::GetScanIndices( int val, std::vector<int> base){
151 std::vector < int > indices;
152 for (UInt_t i=0; i< base.size(); i++){
153 indices.push_back(val % base[i] );
154 val = int( floor( float(val) / float(base[i]) ) );
155 }
156 return indices;
157}
158
159////////////////////////////////////////////////////////////////////////////////
160/// do the actual optimization using a simple scan method,
161/// i.e. calculate the FOM for
162/// different tuning paraemters and remember which one is
163/// gave the best FOM
164
166{
167
168 Double_t bestFOM=-1000000, currentFOM;
169
170 std::map<TString,Double_t> currentParameters;
171 std::map<TString,TMVA::Interval*>::iterator it;
172
173 // for the scan, start at the lower end of the interval and then "move upwards"
174 // initialize all parameters in currentParameter
175 currentParameters.clear();
176 fTunedParameters.clear();
177
178 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); ++it){
179 currentParameters.insert(std::pair<TString,Double_t>(it->first,it->second->GetMin()));
180 fTunedParameters.insert(std::pair<TString,Double_t>(it->first,it->second->GetMin()));
181 }
182 // now loop over all the parameters and get for each combination the figure of merit
183
184 // in order to loop over all the parameters, I first create an "array" (tune parameters)
185 // of arrays (the different values of the tune parameter)
186
187 std::vector< std::vector <Double_t> > v;
188 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); ++it){
189 std::vector< Double_t > tmp;
190 for (Int_t k=0; k<it->second->GetNbins(); k++){
191 tmp.push_back(it->second->GetElement(k));
192 }
193 v.push_back(tmp);
194 }
195 Int_t Ntot = 1;
196 std::vector< int > Nindividual;
197 for (UInt_t i=0; i<v.size(); i++) {
198 Ntot *= v[i].size();
199 Nindividual.push_back(v[i].size());
200 }
201 //loop on the total number of different combinations
202
203 for (int i=0; i<Ntot; i++){
204 UInt_t index=0;
205 std::vector<int> indices = GetScanIndices(i, Nindividual );
206 for (it=fTuneParameters.begin(), index=0; index< indices.size(); ++index, ++it){
207 currentParameters[it->first] = v[index][indices[index]];
208 }
209 Log() << kINFO << "--------------------------" << Endl;
210 Log() << kINFO <<"Settings being evaluated:" << Endl;
211 for (std::map<TString,Double_t>::iterator it_print=currentParameters.begin();
212 it_print!=currentParameters.end(); ++it_print){
213 Log() << kINFO << " " << it_print->first << " = " << it_print->second << Endl;
214 }
215
216 GetMethod()->Reset();
217 GetMethod()->SetTuneParameters(currentParameters);
218 // now do the training for the current parameters:
219 if(!GetMethod()->IsSilentFile()) GetMethod()->BaseDir()->cd();
220 if (i==0) GetMethod()->GetTransformationHandler().CalcTransformations(
221 GetMethod()->Data()->GetEventCollection());
223 GetMethod()->Train();
225 currentFOM = GetFOM();
226 Log() << kINFO << "FOM was found : " << currentFOM << "; current best is " << bestFOM << Endl;
227
228 if (currentFOM > bestFOM) {
229 bestFOM = currentFOM;
230 for (std::map<TString,Double_t>::iterator iter=currentParameters.begin();
231 iter != currentParameters.end(); ++iter){
232 fTunedParameters[iter->first]=iter->second;
233 }
234 }
235 }
236
237 GetMethod()->Reset();
238 GetMethod()->SetTuneParameters(fTunedParameters);
239}
240
241////////////////////////////////////////////////////////////////////////////////
242
244{
245 // ranges (intervals) in which the fit varies the parameters
246 std::vector<TMVA::Interval*> ranges; // intervals of the fit ranges
247 std::map<TString, TMVA::Interval*>::iterator it;
248 std::vector<Double_t> pars; // current (starting) fit parameters
249
250 for (it=fTuneParameters.begin(); it != fTuneParameters.end(); ++it){
251 ranges.push_back(new TMVA::Interval(*(it->second)));
252 pars.push_back( (it->second)->GetMean() ); // like this the order is "right". Always keep the
253 // order in the vector "pars" the same as the iterator
254 // iterates through the tuneParameters !!!!
255 }
256
257 // added to allow for transformation on input variables i.e. norm
258 GetMethod()->GetTransformationHandler().CalcTransformations(GetMethod()->Data()->GetEventCollection());
259
260 // create the fitter
261
262 FitterBase* fitter = NULL;
263
264 if ( fOptimizationFitType == "Minuit" ) {
265 TString opt="FitStrategy=0:UseImprove=False:UseMinos=False:Tolerance=100";
266 if (!TMVA::gConfig().IsSilent() ) opt += TString(":PrintLevel=0");
267
268 fitter = new MinuitFitter( *this,
269 "FitterMinuit_BDTOptimize",
270 ranges, opt );
271 }else if ( fOptimizationFitType == "FitGA" ) {
272 TString opt="PopSize=20:Steps=30:Cycles=3:ConvCrit=0.01:SaveBestCycle=5";
273 fitter = new GeneticFitter( *this,
274 "FitterGA_BDTOptimize",
275 ranges, opt );
276 } else {
277 Log() << kWARNING << " you did not specify a valid OptimizationFitType "
278 << " will use the default (FitGA) " << Endl;
279 TString opt="PopSize=20:Steps=30:Cycles=3:ConvCrit=0.01:SaveBestCycle=5";
280 fitter = new GeneticFitter( *this,
281 "FitterGA_BDTOptimize",
282 ranges, opt );
283 }
284
285 fitter->CheckForUnusedOptions();
286
287 // perform the fit
288 fitter->Run(pars);
289
290 // clean up
291 for (UInt_t ipar=0; ipar<ranges.size(); ipar++) delete ranges[ipar];
292
293 GetMethod()->Reset();
294
295 fTunedParameters.clear();
296 Int_t jcount=0;
297 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); ++it){
298 fTunedParameters.insert(std::pair<TString,Double_t>(it->first,pars[jcount++]));
299 }
300
301 GetMethod()->SetTuneParameters(fTunedParameters);
302
303}
304
305////////////////////////////////////////////////////////////////////////////////
306/// return the estimator (from current FOM) for the fitting interface
307
309{
310 std::map< std::vector<Double_t> , Double_t>::const_iterator iter;
311 iter = fAlreadyTrainedParCombination.find(pars);
312
313 if (iter != fAlreadyTrainedParCombination.end()) {
314 // std::cout << "I had trained Depth=" <<Int_t(pars[0])
315 // <<" MinEv=" <<Int_t(pars[1])
316 // <<" already --> FOM="<< iter->second <<std::endl;
317 return iter->second;
318 }else{
319 std::map<TString,Double_t> currentParameters;
320 Int_t icount =0; // map "pars" to the map of Tuneparameter, make sure
321 // you never screw up this order!!
322 std::map<TString, TMVA::Interval*>::iterator it;
323 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); ++it){
324 currentParameters[it->first] = pars[icount++];
325 }
326 GetMethod()->Reset();
327 GetMethod()->SetTuneParameters(currentParameters);
328 if(!GetMethod()->IsSilentFile()) GetMethod()->BaseDir()->cd();
329
330 if (fNotDoneYet){
331 GetMethod()->GetTransformationHandler().
332 CalcTransformations(GetMethod()->Data()->GetEventCollection());
333 fNotDoneYet=kFALSE;
334 }
336 GetMethod()->Train();
338
339
340 Double_t currentFOM = GetFOM();
341
342 fAlreadyTrainedParCombination.insert(std::make_pair(pars,-currentFOM));
343 return -currentFOM;
344 }
345}
346
347////////////////////////////////////////////////////////////////////////////////
348/// Return the Figure of Merit (FOM) used in the parameter
349/// optimization process
350
352{
353 auto parsePercent = [this](TString input) -> Double_t {
354 // Expects input e.g. SigEffAtBkgEff0 (14 chars) followed by a fraction
355 // either as e.g. 01 or .01 (meaning the same thing 1 %).
356 TString percent = TString(input(14, input.Sizeof()));
357 if (!percent.CountChar('.')) percent.Insert(1,".");
358
359 if (percent.IsFloat()) {
360 return percent.Atof();
361 } else {
362 Log() << kFATAL << " ERROR, " << percent << " in " << fFOMType
363 << " is not a valid floating point number" << Endl;
364 return 0; // Cannot happen
365 }
366 };
367
368 Double_t fom = 0;
369 if (fMethod->DoRegression()){
370 std::cout << " ERROR: Sorry, Regression is not yet implement for automatic parameter optimisation"
371 << " --> exit" << std::endl;
372 std::exit(1);
373 } else {
374 if (fFOMType == "Separation") fom = GetSeparation();
375 else if (fFOMType == "ROCIntegral") fom = GetROCIntegral();
376 else if (fFOMType.BeginsWith("SigEffAtBkgEff0")) fom = GetSigEffAtBkgEff(parsePercent(fFOMType));
377 else if (fFOMType.BeginsWith("BkgRejAtSigEff0")) fom = GetBkgRejAtSigEff(parsePercent(fFOMType));
378 else if (fFOMType.BeginsWith("BkgEffAtSigEff0")) fom = GetBkgEffAtSigEff(parsePercent(fFOMType));
379 else {
380 Log()<< kFATAL << " ERROR, you've specified as Figure of Merit in the "
381 << " parameter optimisation " << fFOMType << " which has not"
382 << " been implemented yet!! ---> exit " << Endl;
383 }
384 }
385
386 fFOMvsIter.push_back(fom);
387 // std::cout << "fom="<<fom<<std::endl; // should write that into a debug log (as option)
388 return fom;
389}
390
391////////////////////////////////////////////////////////////////////////////////
392/// fill the private histograms with the mva distributions for sig/bkg
393
395{
396 if (fMvaSig) fMvaSig->Delete();
397 if (fMvaBkg) fMvaBkg->Delete();
398 if (fMvaSigFineBin) fMvaSigFineBin->Delete();
399 if (fMvaBkgFineBin) fMvaBkgFineBin->Delete();
400
401 // maybe later on this should be done a bit more clever (time consuming) by
402 // first determining proper ranges, removing outliers, as we do in the
403 // MVA output calculation in MethodBase::TestClassifier...
404 // --> then it might be possible also to use the splined PDF's which currently
405 // doesn't seem to work
406
407 fMvaSig = new TH1D("fMvaSig","",100,-1.5,1.5); //used for spline fit
408 fMvaBkg = new TH1D("fMvaBkg","",100,-1.5,1.5); //used for spline fit
409 fMvaSigFineBin = new TH1D("fMvaSigFineBin","",100000,-1.5,1.5);
410 fMvaBkgFineBin = new TH1D("fMvaBkgFineBin","",100000,-1.5,1.5);
411
412 const std::vector< Event*> events=fMethod->Data()->GetEventCollection(Types::kTesting);
413
414 UInt_t signalClassNr = fMethod->DataInfo().GetClassInfo("Signal")->GetNumber();
415
416 // fMethod->GetTransformationHandler().CalcTransformations(fMethod->Data()->GetEventCollection(Types::kTesting));
417
418 for (UInt_t iev=0; iev < events.size() ; iev++){
419 // std::cout << " GetMVADists event " << iev << std::endl;
420 // std::cout << " Class = " << events[iev]->GetClass() << std::endl;
421 // std::cout << " MVA Value = " << fMethod->GetMvaValue(events[iev]) << std::endl;
422 if (events[iev]->GetClass() == signalClassNr) {
423 fMvaSig->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
424 fMvaSigFineBin->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
425 } else {
426 fMvaBkg->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
427 fMvaBkgFineBin->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
428 }
429 }
430}
431////////////////////////////////////////////////////////////////////////////////
432/// return the separation between the signal and background
433/// MVA ouput distribution
434
436{
437 GetMVADists();
438 if (1){
439 PDF *splS = new PDF( " PDF Sig", fMvaSig, PDF::kSpline2 );
440 PDF *splB = new PDF( " PDF Bkg", fMvaBkg, PDF::kSpline2 );
441 return gTools().GetSeparation(*splS,*splB);
442 }else{
443 std::cout << "Separation calculation via histograms (not PDFs) seems to give still strange results!! Don't do that, check!!"<<std::endl;
444 return gTools().GetSeparation(fMvaSigFineBin,fMvaBkgFineBin); // somehow still gives strange results!!!! Check!!!
445 }
446}
447
448////////////////////////////////////////////////////////////////////////////////
449/// calculate the area (integral) under the ROC curve as a
450/// overall quality measure of the classification
451///
452/// making pdfs out of the MVA-output distributions doesn't work
453/// reliably for cases where the MVA-output isn't a smooth distribution.
454/// this happens "frequently" in BDTs for example when the number of
455/// trees is small resulting in only some discrete possible MVA output values.
456/// (I still leave the code here, but use this with care!!! The default
457/// however is to use the distributions!!!
458
460{
461 GetMVADists();
462
463 Double_t integral = 0;
464 if (0){
465 PDF *pdfS = new PDF( " PDF Sig", fMvaSig, PDF::kSpline2 );
466 PDF *pdfB = new PDF( " PDF Bkg", fMvaBkg, PDF::kSpline2 );
467
468 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
469 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
470
471 UInt_t nsteps = 1000;
472 Double_t step = (xmax-xmin)/Double_t(nsteps);
473 Double_t cut = xmin;
474 for (UInt_t i=0; i<nsteps; i++){
475 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
476 cut+=step;
477 }
478 integral*=step;
479 }else{
480 // sanity checks
481 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
482 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
483 std::cout << " Error in OptimizeConfigParameters GetROCIntegral, unequal histograms for sig and bkg.." << std::endl;
484 std::exit(1);
485 }else{
486
487 Double_t *cumulator = fMvaBkgFineBin->GetIntegral();
488 Int_t nbins = fMvaSigFineBin->GetNbinsX();
489 // get the true signal integral (ComputeIntegral just return 1 as they
490 // automatically normalize. IN ADDITION, they do not account for variable
491 // bin sizes (which you might perhaps use later on for the fMvaSig/Bkg histograms)
492 Double_t sigIntegral = 0;
493 for (Int_t ibin=1; ibin<=nbins; ibin++){
494 sigIntegral += fMvaSigFineBin->GetBinContent(ibin) * fMvaSigFineBin->GetBinWidth(ibin);
495 }
496 //gTools().NormHist( fMvaSigFineBin ); // also doesn't use variable bin width. And calls TH1::Scale, which oddly enough does not change the SumOfWeights !!!
497
498 for (Int_t ibin=1; ibin <= nbins; ibin++){ // don't include under- and overflow bin
499 integral += (cumulator[ibin]) * fMvaSigFineBin->GetBinContent(ibin)/sigIntegral * fMvaSigFineBin->GetBinWidth(ibin) ;
500 }
501 }
502 }
503
504 return integral;
505}
506
507////////////////////////////////////////////////////////////////////////////////
508/// calculate the signal efficiency for a given background efficiency
509
511{
512 GetMVADists();
513 Double_t sigEff=0;
514
515 // sanity checks
516 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
517 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
518 std::cout << " Error in OptimizeConfigParameters GetSigEffAt, unequal histograms for sig and bkg.." << std::endl;
519 std::exit(1);
520 }else{
521 Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral();
522 Double_t *sigCumulator = fMvaSigFineBin->GetIntegral();
523
524 Int_t nbins=fMvaBkgFineBin->GetNbinsX();
525 Int_t ibin=0;
526
527 // std::cout << " bkgIntegral="<<bkgIntegral
528 // << " sigIntegral="<<sigIntegral
529 // << " bkgCumulator[nbins]="<<bkgCumulator[nbins]
530 // << " sigCumulator[nbins]="<<sigCumulator[nbins]
531 // << std::endl;
532
533 while (bkgCumulator[nbins-ibin] > (1-bkgEff)) {
534 sigEff = sigCumulator[nbins]-sigCumulator[nbins-ibin];
535 ibin++;
536 }
537 }
538 return sigEff;
539}
540
541
542////////////////////////////////////////////////////////////////////////////////
543/// calculate the background efficiency for a given signal efficiency
544///
545/// adapted by marc-olivier.bettler@cern.ch
546
548{
549 GetMVADists();
550 Double_t bkgEff=0;
551
552 // sanity checks
553 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
554 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
555 std::cout << " Error in OptimizeConfigParameters GetBkgEffAt, unequal histograms for sig and bkg.." << std::endl;
556 std::exit(1);
557 }else{
558
559 Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral();
560 Double_t *sigCumulator = fMvaSigFineBin->GetIntegral();
561
562 Int_t nbins=fMvaBkgFineBin->GetNbinsX();
563 Int_t ibin=0;
564
565 // std::cout << " bkgIntegral="<<bkgIntegral
566 // << " sigIntegral="<<sigIntegral
567 // << " bkgCumulator[nbins]="<<bkgCumulator[nbins]
568 // << " sigCumulator[nbins]="<<sigCumulator[nbins]
569 // << std::endl;
570
571 while ( sigCumulator[nbins]-sigCumulator[nbins-ibin] < sigEff) {
572 bkgEff = bkgCumulator[nbins]-bkgCumulator[nbins-ibin];
573 ibin++;
574 }
575 }
576 return bkgEff;
577}
578
579////////////////////////////////////////////////////////////////////////////////
580/// calculate the background rejection for a given signal efficiency
581///
582/// adapted by marc-olivier.bettler@cern.ch
583
585{
586 GetMVADists();
587 Double_t bkgRej=0;
588
589 // sanity checks
590 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
591 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
592 std::cout << " Error in OptimizeConfigParameters GetBkgEffAt, unequal histograms for sig and bkg.." << std::endl;
593 std::exit(1);
594 }else{
595
596 Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral();
597 Double_t *sigCumulator = fMvaSigFineBin->GetIntegral();
598
599 Int_t nbins=fMvaBkgFineBin->GetNbinsX();
600 Int_t ibin=0;
601
602 // std::cout << " bkgIntegral="<<bkgIntegral
603 // << " sigIntegral="<<sigIntegral
604 // << " bkgCumulator[nbins]="<<bkgCumulator[nbins]
605 // << " sigCumulator[nbins]="<<sigCumulator[nbins]
606 // << std::endl;
607
608 while ( sigCumulator[nbins]-sigCumulator[nbins-ibin] < sigEff) {
609 bkgRej = bkgCumulator[nbins-ibin];
610 ibin++;
611 }
612 }
613 return bkgRej;
614}
#define h(i)
Definition: RSha256.hxx:106
int Int_t
Definition: RtypesCore.h:43
const Bool_t kFALSE
Definition: RtypesCore.h:90
double Double_t
Definition: RtypesCore.h:57
float Float_t
Definition: RtypesCore.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:89
#define ClassImp(name)
Definition: Rtypes.h:361
char name[80]
Definition: TGX11.cxx:109
float xmin
Definition: THbookFile.cxx:93
float ymin
Definition: THbookFile.cxx:93
float xmax
Definition: THbookFile.cxx:93
float ymax
Definition: THbookFile.cxx:93
double floor(double)
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
virtual void SetName(const char *name="")
Set graph name.
Definition: TGraph.cxx:2308
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:614
2-D histogram with a double per channel (see TH1 documentation)}
Definition: TH2.h:292
void CheckForUnusedOptions() const
checks for unused options in option string
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:391
Base class for TMVA fitters.
Definition: FitterBase.h:51
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:74
Fitter using a Genetic Algorithm.
Definition: GeneticFitter.h:43
The TMVA::Interval Class.
Definition: Interval.h:61
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
const char * GetName() const
Definition: MethodBase.h:333
Bool_t DoRegression() const
Definition: MethodBase.h:438
/Fitter using MINUIT
Definition: MinuitFitter.h:47
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
std::vector< int > GetScanIndices(int val, std::vector< int > base)
helper function to scan through the all the combinations in the parameter space
Double_t GetBkgRejAtSigEff(Double_t sigEff=0.5)
calculate the background rejection for a given signal efficiency
virtual ~OptimizeConfigParameters()
the destructor (delete the OptimizeConfigParameters, store the graph and .. delete it)
Double_t GetBkgEffAtSigEff(Double_t sigEff=0.5)
calculate the background efficiency for a given signal efficiency
void optimizeScan()
do the actual optimization using a simple scan method, i.e.
OptimizeConfigParameters(MethodBase *const method, std::map< TString, TMVA::Interval * > tuneParameters, TString fomType="Separation", TString optimizationType="GA")
Constructor which sets either "Classification or Regression".
std::map< TString, TMVA::Interval * > fTuneParameters
std::map< TString, Double_t > optimize()
Double_t GetSeparation()
return the separation between the signal and background MVA ouput distribution
Double_t GetFOM()
Return the Figure of Merit (FOM) used in the parameter optimization process.
Double_t GetSigEffAtBkgEff(Double_t bkgEff=0.1)
calculate the signal efficiency for a given background efficiency
Double_t GetROCIntegral()
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
void GetMVADists()
fill the private histograms with the mva distributions for sig/bkg
Double_t EstimatorFunction(std::vector< Double_t > &)
return the estimator (from current FOM) for the fitting interface
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
Double_t GetXmin() const
Definition: PDF.h:104
Double_t GetXmax() const
Definition: PDF.h:105
Double_t GetVal(Double_t x) const
returns value PDF(x)
Definition: PDF.cxx:700
@ kSpline2
Definition: PDF.h:70
Double_t GetIntegral(Double_t xmin, Double_t xmax)
computes PDF integral within given ranges
Definition: PDF.cxx:653
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as
Definition: Tools.cxx:132
@ kTesting
Definition: Types.h:145
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:796
Basic string class.
Definition: TString.h:131
TString & Insert(Ssiz_t pos, const char *s)
Definition: TString.h:644
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:1987
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791
Int_t CountChar(Int_t c) const
Return number of times character c occurs in the string.
Definition: TString.cxx:476
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
RPY_EXPORTED TCppMethod_t GetMethod(TCppScope_t scope, TCppIndex_t imeth)
TClass * GetClass(T *)
Definition: TClass.h:658
Config & gConfig()
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:212
Double_t Log(Double_t x)
Definition: TMath.h:750
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:180