Logo ROOT  
Reference Guide
OptimizeConfigParameters.cxx
Go to the documentation of this file.
1 /**********************************************************************************
2  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
3  * Package: TMVA *
4  * Class : OptimizeConfigParameters *
5  * Web : http://tmva.sourceforge.net *
6  * *
7  * Description: The OptimizeConfigParameters takes care of "scanning/fitting" *
8  * different tuning parameters in order to find the best set of *
9  * tuning paraemters which will be used in the end *
10  * *
11  * Authors (alphabetical): *
12  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
13  * *
14  * Copyright (c) 2005: *
15  * CERN, Switzerland *
16  * MPI-K Heidelberg, Germany *
17  * *
18  * Redistribution and use in source and binary forms, with or without *
19  * modification, are permitted according to the terms listed in LICENSE *
20  * (http://ttmva.sourceforge.net/LICENSE) *
21  **********************************************************************************/
22 
23 /*! \class TMVA::OptimizeConfigParameters
24 \ingroup TMVA
25 
26 */
27 
29 #include "TMVA/Config.h"
30 #include "TMVA/DataSet.h"
31 #include "TMVA/DataSetInfo.h"
32 #include "TMVA/Event.h"
33 #include "TMVA/IFitterTarget.h"
34 #include "TMVA/FitterBase.h"
35 #include "TMVA/GeneticFitter.h"
36 #include "TMVA/IMethod.h"
37 #include "TMVA/Interval.h"
38 #include "TMVA/MethodBase.h"
39 #include "TMVA/MethodFDA.h"
40 #include "TMVA/MsgLogger.h"
41 #include "TMVA/MinuitFitter.h"
42 #include "TMVA/PDF.h"
43 #include "TMVA/Tools.h"
44 #include "TMVA/Types.h"
45 
46 #include "TGraph.h"
47 #include "TH1.h"
48 #include "TH2.h"
49 #include "TMath.h"
50 
51 #include <cstdlib>
52 #include <limits>
53 
54 
56 
57 ////////////////////////////////////////////////////////////////////////////////
58 /// Constructor which sets either "Classification or Regression"
59 
60 TMVA::OptimizeConfigParameters::OptimizeConfigParameters(MethodBase * const method, std::map<TString,TMVA::Interval*> tuneParameters, TString fomType, TString optimizationFitType)
61 : fMethod(method),
62  fTuneParameters(tuneParameters),
63  fFOMType(fomType),
64  fOptimizationFitType(optimizationFitType),
65  fMvaSig(NULL),
66  fMvaBkg(NULL),
67  fMvaSigFineBin(NULL),
68  fMvaBkgFineBin(NULL),
69  fNotDoneYet(kFALSE)
70 {
71  std::string name = "OptimizeConfigParameters_";
72  name += std::string(GetMethod()->GetName());
73  fLogger = new MsgLogger(name);
74  if (fMethod->DoRegression()){
75  Log() << kFATAL << " ERROR: Sorry, Regression is not yet implement for automatic parameter optimization"
76  << " --> exit" << Endl;
77  }
78 
79  Log() << kINFO << "Automatic optimisation of tuning parameters in "
80  << GetMethod()->GetName() << " uses:" << Endl;
81 
82  std::map<TString,TMVA::Interval*>::iterator it;
83  for (it=fTuneParameters.begin(); it!=fTuneParameters.end();++it) {
84  Log() << kINFO << it->first
85  << " in range from: " << it->second->GetMin()
86  << " to: " << it->second->GetMax()
87  << " in : " << it->second->GetNbins() << " steps"
88  << Endl;
89  }
90  Log() << kINFO << " using the options: " << fFOMType << " and " << fOptimizationFitType << Endl;
91 }
92 
93 ////////////////////////////////////////////////////////////////////////////////
94 /// the destructor (delete the OptimizeConfigParameters, store the graph and .. delete it)
95 
97 {
98  if(!GetMethod()->IsSilentFile()) GetMethod()->BaseDir()->cd();
99  Int_t n=Int_t(fFOMvsIter.size());
100  Float_t *x = new Float_t[n];
101  Float_t *y = new Float_t[n];
102  Float_t ymin=(Float_t)+999999999;
103  Float_t ymax=(Float_t)-999999999;
104 
105  for (Int_t i=0;i<n;i++){
106  x[i] = Float_t(i);
107  y[i] = fFOMvsIter[i];
108  if (ymin>y[i]) ymin=y[i];
109  if (ymax<y[i]) ymax=y[i];
110  }
111 
112  TH2D *h=new TH2D(TString(GetMethod()->GetName())+"_FOMvsIterFrame","",2,0,n,2,ymin*0.95,ymax*1.05);
113  h->SetXTitle("#iteration "+fOptimizationFitType);
114  h->SetYTitle(fFOMType);
115  TGraph *gFOMvsIter = new TGraph(n,x,y);
116  gFOMvsIter->SetName((TString(GetMethod()->GetName())+"_FOMvsIter").Data());
117  if(!GetMethod()->IsSilentFile()) gFOMvsIter->Write();
118  if(!GetMethod()->IsSilentFile()) h->Write();
119 
120  delete [] x;
121  delete [] y;
122  // delete fFOMvsIter;
123 }
124 
125 ////////////////////////////////////////////////////////////////////////////////
126 
127 std::map<TString,Double_t> TMVA::OptimizeConfigParameters::optimize()
128 {
129  if (fOptimizationFitType == "Scan" ) this->optimizeScan();
130  else if (fOptimizationFitType == "FitGA" || fOptimizationFitType == "Minuit" ) this->optimizeFit();
131  else {
132  Log() << kFATAL << "You have chosen as optimization type " << fOptimizationFitType
133  << " that is not (yet) coded --> exit()" << Endl;
134  }
135 
136  Log() << kINFO << "For " << GetMethod()->GetName() << " the optimized Parameters are: " << Endl;
137  std::map<TString,Double_t>::iterator it;
138  for(it=fTunedParameters.begin(); it!= fTunedParameters.end(); ++it){
139  Log() << kINFO << it->first << " = " << it->second << Endl;
140  }
141  return fTunedParameters;
142 
143 }
144 
145 ////////////////////////////////////////////////////////////////////////////////
146 /// helper function to scan through the all the combinations in the
147 /// parameter space
148 
149 std::vector< int > TMVA::OptimizeConfigParameters::GetScanIndices( int val, std::vector<int> base){
150  std::vector < int > indices;
151  for (UInt_t i=0; i< base.size(); i++){
152  indices.push_back(val % base[i] );
153  val = int( floor( float(val) / float(base[i]) ) );
154  }
155  return indices;
156 }
157 
158 ////////////////////////////////////////////////////////////////////////////////
159 /// do the actual optimization using a simple scan method,
160 /// i.e. calculate the FOM for
161 /// different tuning paraemters and remember which one is
162 /// gave the best FOM
163 
165 {
166 
167  Double_t bestFOM=-1000000, currentFOM;
168 
169  std::map<TString,Double_t> currentParameters;
170  std::map<TString,TMVA::Interval*>::iterator it;
171 
172  // for the scan, start at the lower end of the interval and then "move upwards"
173  // initialize all parameters in currentParameter
174  currentParameters.clear();
175  fTunedParameters.clear();
176 
177  for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); ++it){
178  currentParameters.insert(std::pair<TString,Double_t>(it->first,it->second->GetMin()));
179  fTunedParameters.insert(std::pair<TString,Double_t>(it->first,it->second->GetMin()));
180  }
181  // now loop over all the parameters and get for each combination the figure of merit
182 
183  // in order to loop over all the parameters, I first create an "array" (tune parameters)
184  // of arrays (the different values of the tune parameter)
185 
186  std::vector< std::vector <Double_t> > v;
187  for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); ++it){
188  std::vector< Double_t > tmp;
189  for (Int_t k=0; k<it->second->GetNbins(); k++){
190  tmp.push_back(it->second->GetElement(k));
191  }
192  v.push_back(tmp);
193  }
194  Int_t Ntot = 1;
195  std::vector< int > Nindividual;
196  for (UInt_t i=0; i<v.size(); i++) {
197  Ntot *= v[i].size();
198  Nindividual.push_back(v[i].size());
199  }
200  //loop on the total number of different combinations
201 
202  for (int i=0; i<Ntot; i++){
203  UInt_t index=0;
204  std::vector<int> indices = GetScanIndices(i, Nindividual );
205  for (it=fTuneParameters.begin(), index=0; index< indices.size(); ++index, ++it){
206  currentParameters[it->first] = v[index][indices[index]];
207  }
208  Log() << kINFO << "--------------------------" << Endl;
209  Log() << kINFO <<"Settings being evaluated:" << Endl;
210  for (std::map<TString,Double_t>::iterator it_print=currentParameters.begin();
211  it_print!=currentParameters.end(); ++it_print){
212  Log() << kINFO << " " << it_print->first << " = " << it_print->second << Endl;
213  }
214 
215  GetMethod()->Reset();
216  GetMethod()->SetTuneParameters(currentParameters);
217  // now do the training for the current parameters:
218  if(!GetMethod()->IsSilentFile()) GetMethod()->BaseDir()->cd();
219  if (i==0) GetMethod()->GetTransformationHandler().CalcTransformations(
220  GetMethod()->Data()->GetEventCollection());
222  GetMethod()->Train();
224  currentFOM = GetFOM();
225  Log() << kINFO << "FOM was found : " << currentFOM << "; current best is " << bestFOM << Endl;
226 
227  if (currentFOM > bestFOM) {
228  bestFOM = currentFOM;
229  for (std::map<TString,Double_t>::iterator iter=currentParameters.begin();
230  iter != currentParameters.end(); ++iter){
231  fTunedParameters[iter->first]=iter->second;
232  }
233  }
234  }
235 
236  GetMethod()->Reset();
237  GetMethod()->SetTuneParameters(fTunedParameters);
238 }
239 
240 ////////////////////////////////////////////////////////////////////////////////
241 
243 {
244  // ranges (intervals) in which the fit varies the parameters
245  std::vector<TMVA::Interval*> ranges; // intervals of the fit ranges
246  std::map<TString, TMVA::Interval*>::iterator it;
247  std::vector<Double_t> pars; // current (starting) fit parameters
248 
249  for (it=fTuneParameters.begin(); it != fTuneParameters.end(); ++it){
250  ranges.push_back(new TMVA::Interval(*(it->second)));
251  pars.push_back( (it->second)->GetMean() ); // like this the order is "right". Always keep the
252  // order in the vector "pars" the same as the iterator
253  // iterates through the tuneParameters !!!!
254  }
255 
256  // added to allow for transformation on input variables i.e. norm
257  GetMethod()->GetTransformationHandler().CalcTransformations(GetMethod()->Data()->GetEventCollection());
258 
259  // create the fitter
260 
261  FitterBase* fitter = NULL;
262 
263  if ( fOptimizationFitType == "Minuit" ) {
264  TString opt="FitStrategy=0:UseImprove=False:UseMinos=False:Tolerance=100";
265  if (!TMVA::gConfig().IsSilent() ) opt += TString(":PrintLevel=0");
266 
267  fitter = new MinuitFitter( *this,
268  "FitterMinuit_BDTOptimize",
269  ranges, opt );
270  }else if ( fOptimizationFitType == "FitGA" ) {
271  TString opt="PopSize=20:Steps=30:Cycles=3:ConvCrit=0.01:SaveBestCycle=5";
272  fitter = new GeneticFitter( *this,
273  "FitterGA_BDTOptimize",
274  ranges, opt );
275  } else {
276  Log() << kWARNING << " you did not specify a valid OptimizationFitType "
277  << " will use the default (FitGA) " << Endl;
278  TString opt="PopSize=20:Steps=30:Cycles=3:ConvCrit=0.01:SaveBestCycle=5";
279  fitter = new GeneticFitter( *this,
280  "FitterGA_BDTOptimize",
281  ranges, opt );
282  }
283 
284  fitter->CheckForUnusedOptions();
285 
286  // perform the fit
287  fitter->Run(pars);
288 
289  // clean up
290  for (UInt_t ipar=0; ipar<ranges.size(); ipar++) delete ranges[ipar];
291 
292  GetMethod()->Reset();
293 
294  fTunedParameters.clear();
295  Int_t jcount=0;
296  for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); ++it){
297  fTunedParameters.insert(std::pair<TString,Double_t>(it->first,pars[jcount++]));
298  }
299 
300  GetMethod()->SetTuneParameters(fTunedParameters);
301 
302 }
303 
304 ////////////////////////////////////////////////////////////////////////////////
305 /// return the estimator (from current FOM) for the fitting interface
306 
308 {
309  std::map< std::vector<Double_t> , Double_t>::const_iterator iter;
310  iter = fAlreadyTrainedParCombination.find(pars);
311 
312  if (iter != fAlreadyTrainedParCombination.end()) {
313  // std::cout << "I had trained Depth=" <<Int_t(pars[0])
314  // <<" MinEv=" <<Int_t(pars[1])
315  // <<" already --> FOM="<< iter->second <<std::endl;
316  return iter->second;
317  }else{
318  std::map<TString,Double_t> currentParameters;
319  Int_t icount =0; // map "pars" to the map of Tuneparameter, make sure
320  // you never screw up this order!!
321  std::map<TString, TMVA::Interval*>::iterator it;
322  for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); ++it){
323  currentParameters[it->first] = pars[icount++];
324  }
325  GetMethod()->Reset();
326  GetMethod()->SetTuneParameters(currentParameters);
327  if(!GetMethod()->IsSilentFile()) GetMethod()->BaseDir()->cd();
328 
329  if (fNotDoneYet){
330  GetMethod()->GetTransformationHandler().
331  CalcTransformations(GetMethod()->Data()->GetEventCollection());
332  fNotDoneYet=kFALSE;
333  }
335  GetMethod()->Train();
337 
338 
339  Double_t currentFOM = GetFOM();
340 
341  fAlreadyTrainedParCombination.insert(std::make_pair(pars,-currentFOM));
342  return -currentFOM;
343  }
344 }
345 
346 ////////////////////////////////////////////////////////////////////////////////
347 /// Return the Figure of Merit (FOM) used in the parameter
348 /// optimization process
349 
351 {
352  auto parsePercent = [this](TString input) -> Double_t {
353  // Expects input e.g. SigEffAtBkgEff0 (14 chars) followed by a fraction
354  // either as e.g. 01 or .01 (meaning the same thing 1 %).
355  TString percent = TString(input(14, input.Sizeof()));
356  if (!percent.CountChar('.')) percent.Insert(1,".");
357 
358  if (percent.IsFloat()) {
359  return percent.Atof();
360  } else {
361  Log() << kFATAL << " ERROR, " << percent << " in " << fFOMType
362  << " is not a valid floating point number" << Endl;
363  return 0; // Cannot happen
364  }
365  };
366 
367  Double_t fom = 0;
368  if (fMethod->DoRegression()){
369  std::cout << " ERROR: Sorry, Regression is not yet implement for automatic parameter optimisation"
370  << " --> exit" << std::endl;
371  std::exit(1);
372  } else {
373  if (fFOMType == "Separation") fom = GetSeparation();
374  else if (fFOMType == "ROCIntegral") fom = GetROCIntegral();
375  else if (fFOMType.BeginsWith("SigEffAtBkgEff0")) fom = GetSigEffAtBkgEff(parsePercent(fFOMType));
376  else if (fFOMType.BeginsWith("BkgRejAtSigEff0")) fom = GetBkgRejAtSigEff(parsePercent(fFOMType));
377  else if (fFOMType.BeginsWith("BkgEffAtSigEff0")) fom = GetBkgEffAtSigEff(parsePercent(fFOMType));
378  else {
379  Log()<< kFATAL << " ERROR, you've specified as Figure of Merit in the "
380  << " parameter optimisation " << fFOMType << " which has not"
381  << " been implemented yet!! ---> exit " << Endl;
382  }
383  }
384 
385  fFOMvsIter.push_back(fom);
386  // std::cout << "fom="<<fom<<std::endl; // should write that into a debug log (as option)
387  return fom;
388 }
389 
390 ////////////////////////////////////////////////////////////////////////////////
391 /// fill the private histograms with the mva distributions for sig/bkg
392 
394 {
395  if (fMvaSig) fMvaSig->Delete();
396  if (fMvaBkg) fMvaBkg->Delete();
397  if (fMvaSigFineBin) fMvaSigFineBin->Delete();
398  if (fMvaBkgFineBin) fMvaBkgFineBin->Delete();
399 
400  // maybe later on this should be done a bit more clever (time consuming) by
401  // first determining proper ranges, removing outliers, as we do in the
402  // MVA output calculation in MethodBase::TestClassifier...
403  // --> then it might be possible also to use the splined PDF's which currently
404  // doesn't seem to work
405 
406  fMvaSig = new TH1D("fMvaSig","",100,-1.5,1.5); //used for spline fit
407  fMvaBkg = new TH1D("fMvaBkg","",100,-1.5,1.5); //used for spline fit
408  fMvaSigFineBin = new TH1D("fMvaSigFineBin","",100000,-1.5,1.5);
409  fMvaBkgFineBin = new TH1D("fMvaBkgFineBin","",100000,-1.5,1.5);
410 
411  const std::vector< Event*> events=fMethod->Data()->GetEventCollection(Types::kTesting);
412 
413  UInt_t signalClassNr = fMethod->DataInfo().GetClassInfo("Signal")->GetNumber();
414 
415  // fMethod->GetTransformationHandler().CalcTransformations(fMethod->Data()->GetEventCollection(Types::kTesting));
416 
417  for (UInt_t iev=0; iev < events.size() ; iev++){
418  // std::cout << " GetMVADists event " << iev << std::endl;
419  // std::cout << " Class = " << events[iev]->GetClass() << std::endl;
420  // std::cout << " MVA Value = " << fMethod->GetMvaValue(events[iev]) << std::endl;
421  if (events[iev]->GetClass() == signalClassNr) {
422  fMvaSig->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
423  fMvaSigFineBin->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
424  } else {
425  fMvaBkg->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
426  fMvaBkgFineBin->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
427  }
428  }
429 }
430 ////////////////////////////////////////////////////////////////////////////////
431 /// return the separation between the signal and background
432 /// MVA ouput distribution
433 
435 {
436  GetMVADists();
437  if (1){
438  PDF *splS = new PDF( " PDF Sig", fMvaSig, PDF::kSpline2 );
439  PDF *splB = new PDF( " PDF Bkg", fMvaBkg, PDF::kSpline2 );
440  return gTools().GetSeparation(*splS,*splB);
441  }else{
442  std::cout << "Separation calculation via histograms (not PDFs) seems to give still strange results!! Don't do that, check!!"<<std::endl;
443  return gTools().GetSeparation(fMvaSigFineBin,fMvaBkgFineBin); // somehow still gives strange results!!!! Check!!!
444  }
445 }
446 
447 ////////////////////////////////////////////////////////////////////////////////
448 /// calculate the area (integral) under the ROC curve as a
449 /// overall quality measure of the classification
450 ///
451 /// making pdfs out of the MVA-output distributions doesn't work
452 /// reliably for cases where the MVA-output isn't a smooth distribution.
453 /// this happens "frequently" in BDTs for example when the number of
454 /// trees is small resulting in only some discrete possible MVA output values.
455 /// (I still leave the code here, but use this with care!!! The default
456 /// however is to use the distributions!!!
457 
459 {
460  GetMVADists();
461 
462  Double_t integral = 0;
463  if (0){
464  PDF *pdfS = new PDF( " PDF Sig", fMvaSig, PDF::kSpline2 );
465  PDF *pdfB = new PDF( " PDF Bkg", fMvaBkg, PDF::kSpline2 );
466 
467  Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
468  Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
469 
470  UInt_t nsteps = 1000;
471  Double_t step = (xmax-xmin)/Double_t(nsteps);
472  Double_t cut = xmin;
473  for (UInt_t i=0; i<nsteps; i++){
474  integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
475  cut+=step;
476  }
477  integral*=step;
478  }else{
479  // sanity checks
480  if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
481  (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
482  std::cout << " Error in OptimizeConfigParameters GetROCIntegral, unequal histograms for sig and bkg.." << std::endl;
483  std::exit(1);
484  }else{
485 
486  Double_t *cumulator = fMvaBkgFineBin->GetIntegral();
487  Int_t nbins = fMvaSigFineBin->GetNbinsX();
488  // get the true signal integral (ComputeIntegral just return 1 as they
489  // automatically normalize. IN ADDITION, they do not account for variable
490  // bin sizes (which you might perhaps use later on for the fMvaSig/Bkg histograms)
491  Double_t sigIntegral = 0;
492  for (Int_t ibin=1; ibin<=nbins; ibin++){
493  sigIntegral += fMvaSigFineBin->GetBinContent(ibin) * fMvaSigFineBin->GetBinWidth(ibin);
494  }
495  //gTools().NormHist( fMvaSigFineBin ); // also doesn't use variable bin width. And calls TH1::Scale, which oddly enough does not change the SumOfWeights !!!
496 
497  for (Int_t ibin=1; ibin <= nbins; ibin++){ // don't include under- and overflow bin
498  integral += (cumulator[ibin]) * fMvaSigFineBin->GetBinContent(ibin)/sigIntegral * fMvaSigFineBin->GetBinWidth(ibin) ;
499  }
500  }
501  }
502 
503  return integral;
504 }
505 
506 ////////////////////////////////////////////////////////////////////////////////
507 /// calculate the signal efficiency for a given background efficiency
508 
510 {
511  GetMVADists();
512  Double_t sigEff=0;
513 
514  // sanity checks
515  if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
516  (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
517  std::cout << " Error in OptimizeConfigParameters GetSigEffAt, unequal histograms for sig and bkg.." << std::endl;
518  std::exit(1);
519  }else{
520  Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral();
521  Double_t *sigCumulator = fMvaSigFineBin->GetIntegral();
522 
523  Int_t nbins=fMvaBkgFineBin->GetNbinsX();
524  Int_t ibin=0;
525 
526  // std::cout << " bkgIntegral="<<bkgIntegral
527  // << " sigIntegral="<<sigIntegral
528  // << " bkgCumulator[nbins]="<<bkgCumulator[nbins]
529  // << " sigCumulator[nbins]="<<sigCumulator[nbins]
530  // << std::endl;
531 
532  while (bkgCumulator[nbins-ibin] > (1-bkgEff)) {
533  sigEff = sigCumulator[nbins]-sigCumulator[nbins-ibin];
534  ibin++;
535  }
536  }
537  return sigEff;
538 }
539 
540 
541 ////////////////////////////////////////////////////////////////////////////////
542 /// calculate the background efficiency for a given signal efficiency
543 ///
544 /// adapted by marc-olivier.bettler@cern.ch
545 
547 {
548  GetMVADists();
549  Double_t bkgEff=0;
550 
551  // sanity checks
552  if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
553  (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
554  std::cout << " Error in OptimizeConfigParameters GetBkgEffAt, unequal histograms for sig and bkg.." << std::endl;
555  std::exit(1);
556  }else{
557 
558  Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral();
559  Double_t *sigCumulator = fMvaSigFineBin->GetIntegral();
560 
561  Int_t nbins=fMvaBkgFineBin->GetNbinsX();
562  Int_t ibin=0;
563 
564  // std::cout << " bkgIntegral="<<bkgIntegral
565  // << " sigIntegral="<<sigIntegral
566  // << " bkgCumulator[nbins]="<<bkgCumulator[nbins]
567  // << " sigCumulator[nbins]="<<sigCumulator[nbins]
568  // << std::endl;
569 
570  while ( sigCumulator[nbins]-sigCumulator[nbins-ibin] < sigEff) {
571  bkgEff = bkgCumulator[nbins]-bkgCumulator[nbins-ibin];
572  ibin++;
573  }
574  }
575  return bkgEff;
576 }
577 
578 ////////////////////////////////////////////////////////////////////////////////
579 /// calculate the background rejection for a given signal efficiency
580 ///
581 /// adapted by marc-olivier.bettler@cern.ch
582 
584 {
585  GetMVADists();
586  Double_t bkgRej=0;
587 
588  // sanity checks
589  if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
590  (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
591  std::cout << " Error in OptimizeConfigParameters GetBkgEffAt, unequal histograms for sig and bkg.." << std::endl;
592  std::exit(1);
593  }else{
594 
595  Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral();
596  Double_t *sigCumulator = fMvaSigFineBin->GetIntegral();
597 
598  Int_t nbins=fMvaBkgFineBin->GetNbinsX();
599  Int_t ibin=0;
600 
601  // std::cout << " bkgIntegral="<<bkgIntegral
602  // << " sigIntegral="<<sigIntegral
603  // << " bkgCumulator[nbins]="<<bkgCumulator[nbins]
604  // << " sigCumulator[nbins]="<<sigCumulator[nbins]
605  // << std::endl;
606 
607  while ( sigCumulator[nbins]-sigCumulator[nbins-ibin] < sigEff) {
608  bkgRej = bkgCumulator[nbins-ibin];
609  ibin++;
610  }
611  }
612  return bkgRej;
613 }
n
const Int_t n
Definition: legend1.C:16
TMVA::OptimizeConfigParameters::fOptimizationFitType
TString fOptimizationFitType
Definition: OptimizeConfigParameters.h:88
TMVA::GeneticFitter
Fitter using a Genetic Algorithm.
Definition: GeneticFitter.h:44
TMVA::OptimizeConfigParameters::GetROCIntegral
Double_t GetROCIntegral()
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
Definition: OptimizeConfigParameters.cxx:458
ymax
float ymax
Definition: THbookFile.cxx:95
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:91
TMVA::OptimizeConfigParameters::GetFOM
Double_t GetFOM()
Return the Figure of Merit (FOM) used in the parameter optimization process.
Definition: OptimizeConfigParameters.cxx:350
TMVA::OptimizeConfigParameters::GetScanIndices
std::vector< int > GetScanIndices(int val, std::vector< int > base)
helper function to scan through the all the combinations in the parameter space
Definition: OptimizeConfigParameters.cxx:149
TMVA::OptimizeConfigParameters::optimizeScan
void optimizeScan()
do the actual optimization using a simple scan method, i.e.
Definition: OptimizeConfigParameters.cxx:164
TMath::Max
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:212
TMVA::OptimizeConfigParameters::GetBkgEffAtSigEff
Double_t GetBkgEffAtSigEff(Double_t sigEff=0.5)
calculate the background efficiency for a given signal efficiency
Definition: OptimizeConfigParameters.cxx:546
floor
double floor(double)
DataSetInfo.h
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
TGraph.h
xmax
float xmax
Definition: THbookFile.cxx:95
IMethod.h
TMVA::OptimizeConfigParameters::GetSigEffAtBkgEff
Double_t GetSigEffAtBkgEff(Double_t bkgEff=0.1)
calculate the signal efficiency for a given background efficiency
Definition: OptimizeConfigParameters.cxx:509
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:760
TMVA::Types::kTesting
@ kTesting
Definition: Types.h:146
TString::Atof
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:1987
TH1D
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:618
TMVA::OptimizeConfigParameters::GetBkgRejAtSigEff
Double_t GetBkgRejAtSigEff(Double_t sigEff=0.5)
calculate the background rejection for a given signal efficiency
Definition: OptimizeConfigParameters.cxx:583
TMVA::OptimizeConfigParameters::EstimatorFunction
Double_t EstimatorFunction(std::vector< Double_t > &)
return the estimator (from current FOM) for the fitting interface
Definition: OptimizeConfigParameters.cxx:307
Float_t
float Float_t
Definition: RtypesCore.h:57
TMVA::Tools::GetSeparation
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as
Definition: Tools.cxx:133
Int_t
int Int_t
Definition: RtypesCore.h:45
TMVA::OptimizeConfigParameters::GetMVADists
void GetMVADists()
fill the private histograms with the mva distributions for sig/bkg
Definition: OptimizeConfigParameters.cxx:393
TMVA::OptimizeConfigParameters::fFOMType
TString fFOMType
Definition: OptimizeConfigParameters.h:87
x
Double_t x[n]
Definition: legend1.C:17
ROOT::GetClass
TClass * GetClass(T *)
Definition: TClass.h:601
MethodBase.h
TMVA::Event::SetIsTraining
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:391
TString::IsFloat
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791
TString
Basic string class.
Definition: TString.h:136
TMVA::OptimizeConfigParameters::fTuneParameters
std::map< TString, TMVA::Interval * > fTuneParameters
Definition: OptimizeConfigParameters.h:84
v
@ v
Definition: rootcling_impl.cxx:3635
TMVA::OptimizeConfigParameters::optimize
std::map< TString, Double_t > optimize()
Definition: OptimizeConfigParameters.cxx:127
TString::CountChar
Int_t CountChar(Int_t c) const
Return number of times character c occurs in the string.
Definition: TString.cxx:476
PDF.h
OptimizeConfigParameters.h
TMVA::MinuitFitter
/Fitter using MINUIT
Definition: MinuitFitter.h:48
TMVA::FitterBase::Run
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:74
MsgLogger.h
TMVA::PDF::GetXmin
Double_t GetXmin() const
Definition: PDF.h:104
TMVA::Configurable::CheckForUnusedOptions
void CheckForUnusedOptions() const
checks for unused options in option string
Definition: Configurable.cxx:270
xmin
float xmin
Definition: THbookFile.cxx:95
h
#define h(i)
Definition: RSha256.hxx:106
TGraph::SetName
virtual void SetName(const char *name="")
Set graph name.
Definition: TGraph.cxx:2313
MinuitFitter.h
TMVA::gConfig
Config & gConfig()
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:92
TMVA::PDF::kSpline2
@ kSpline2
Definition: PDF.h:70
TH2D
2-D histogram with a double per channel (see TH1 documentation)}
Definition: TH2.h:292
GeneticFitter.h
Event.h
MethodFDA.h
TMVA::PDF::GetXmax
Double_t GetXmax() const
Definition: PDF.h:105
TMVA::OptimizeConfigParameters::fLogger
MsgLogger * fLogger
Definition: OptimizeConfigParameters.h:97
TString::Insert
TString & Insert(Ssiz_t pos, const char *s)
Definition: TString.h:649
y
Double_t y[n]
Definition: legend1.C:17
TMVA::MethodBase
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
Types.h
TH2.h
TMVA::OptimizeConfigParameters::GetMethod
MethodBase * GetMethod()
Definition: OptimizeConfigParameters.h:72
TMath::Min
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:180
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Config.h
TMVA::OptimizeConfigParameters::OptimizeConfigParameters
OptimizeConfigParameters(MethodBase *const method, std::map< TString, TMVA::Interval * > tuneParameters, TString fomType="Separation", TString optimizationType="GA")
Constructor which sets either "Classification or Regression".
Definition: OptimizeConfigParameters.cxx:60
unsigned int
TMVA::MethodBase::DoRegression
Bool_t DoRegression() const
Definition: MethodBase.h:438
TMVA::OptimizeConfigParameters
Definition: OptimizeConfigParameters.h:49
ymin
float ymin
Definition: THbookFile.cxx:95
TMVA::PDF
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
TMVA::OptimizeConfigParameters::optimizeFit
void optimizeFit()
Definition: OptimizeConfigParameters.cxx:242
Double_t
double Double_t
Definition: RtypesCore.h:59
IFitterTarget.h
TGraph
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
TMVA::MsgLogger
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
Cppyy::GetMethod
RPY_EXPORTED TCppMethod_t GetMethod(TCppScope_t scope, TCppIndex_t imeth)
Definition: clingwrapper.cxx:1386
TMVA::OptimizeConfigParameters::fMethod
MethodBase *const fMethod
Definition: OptimizeConfigParameters.h:82
TMVA::OptimizeConfigParameters::Log
MsgLogger & Log() const
Definition: OptimizeConfigParameters.h:98
TObject::Write
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:795
TMVA::MethodBase::GetName
const char * GetName() const
Definition: MethodBase.h:333
name
char name[80]
Definition: TGX11.cxx:110
Tools.h
TMVA::gTools
Tools & gTools()
TMVA::PDF::GetVal
Double_t GetVal(Double_t x) const
returns value PDF(x)
Definition: PDF.cxx:701
TH1.h
DataSet.h
TMVA::PDF::GetIntegral
Double_t GetIntegral(Double_t xmin, Double_t xmax)
computes PDF integral within given ranges
Definition: PDF.cxx:654
TMVA::OptimizeConfigParameters::GetSeparation
Double_t GetSeparation()
return the separation between the signal and background MVA ouput distribution
Definition: OptimizeConfigParameters.cxx:434
TMVA::OptimizeConfigParameters::~OptimizeConfigParameters
virtual ~OptimizeConfigParameters()
the destructor (delete the OptimizeConfigParameters, store the graph and .. delete it)
Definition: OptimizeConfigParameters.cxx:96
FitterBase.h
TMath.h
Interval.h
int
TMVA::Interval
The TMVA::Interval Class.
Definition: Interval.h:61
TMVA::FitterBase
Base class for TMVA fitters.
Definition: FitterBase.h:51