Logo ROOT   6.14/05
Reference Guide
MethodBoost.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCompositeBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * U. of Victoria, Canada *
24  * MPI-K Heidelberg, Germany *
25  * U. of Bonn, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  **********************************************************************************/
31 
32 /*! \class TMVA::MethodBoost
33 \ingroup TMVA
34 
35 Class for boosting a TMVA method
36 
37 This class is meant to boost a single classifier. Boosting means
38 training the classifier a few times. Every time the weights of the
39 events are modified according to how well the classifier performed
40 on the test sample.
41 
42 */
43 
44 #include "TMVA/MethodBoost.h"
45 
46 #include "TMVA/ClassifierFactory.h"
47 #include "TMVA/Config.h"
48 #include "TMVA/Configurable.h"
49 #include "TMVA/DataSet.h"
50 #include "TMVA/DataSetInfo.h"
51 #include "TMVA/IMethod.h"
52 #include "TMVA/MethodBase.h"
53 #include "TMVA/MethodCategory.h"
55 #include "TMVA/MethodDT.h"
56 #include "TMVA/MethodFisher.h"
57 #include "TMVA/PDF.h"
58 #include "TMVA/Results.h"
59 #include "TMVA/Timer.h"
60 #include "TMVA/Tools.h"
61 #include "TMVA/Types.h"
62 
63 #include "TMVA/SeparationBase.h"
65 #include "TMVA/GiniIndex.h"
66 #include "TMVA/CrossEntropy.h"
69 
70 #include "Riostream.h"
71 #include "TRandom3.h"
72 #include "TFile.h"
73 #include "TMath.h"
74 #include "TObjString.h"
75 #include "TH1F.h"
76 #include "TH2F.h"
77 #include "TGraph.h"
78 #include "TSpline.h"
79 #include "TDirectory.h"
80 #include "TTree.h"
81 
82 #include <algorithm>
83 #include <iomanip>
84 #include <vector>
85 #include <cmath>
86 
87 
88 REGISTER_METHOD(Boost)
89 
91 
92 ////////////////////////////////////////////////////////////////////////////////
93 
95  const TString& methodTitle,
96  DataSetInfo& theData,
97  const TString& theOption ) :
98  TMVA::MethodCompositeBase( jobName, Types::kBoost, methodTitle, theData, theOption)
99  , fBoostNum(0)
100  , fDetailedMonitoring(kFALSE)
101  , fAdaBoostBeta(0)
102  , fRandomSeed(0)
103  , fBaggedSampleFraction(0)
104  , fBoostedMethodTitle(methodTitle)
105  , fBoostedMethodOptions(theOption)
106  , fMonitorBoostedMethod(kFALSE)
107  , fMonitorTree(0)
108  , fBoostWeight(0)
109  , fMethodError(0)
110  , fROC_training(0.0)
111  , fOverlap_integral(0.0)
112  , fMVAvalues(0)
113 {
114  fMVAvalues = new std::vector<Float_t>;
115  fDataSetManager = NULL;
117 }
118 
119 ////////////////////////////////////////////////////////////////////////////////
120 
122  const TString& theWeightFile)
123  : TMVA::MethodCompositeBase( Types::kBoost, dsi, theWeightFile)
124  , fBoostNum(0)
126  , fAdaBoostBeta(0)
127  , fRandomSeed(0)
129  , fBoostedMethodTitle("")
132  , fMonitorTree(0)
133  , fBoostWeight(0)
134  , fMethodError(0)
135  , fROC_training(0.0)
136  , fOverlap_integral(0.0)
137  , fMVAvalues(0)
138 {
139  fMVAvalues = new std::vector<Float_t>;
140  fDataSetManager = NULL;
142 }
143 
144 ////////////////////////////////////////////////////////////////////////////////
145 /// destructor
146 
148 {
149  fMethodWeight.clear();
150 
151  // the histogram themselves are deleted when the file is closed
152 
153  fTrainSigMVAHist.clear();
154  fTrainBgdMVAHist.clear();
155  fBTrainSigMVAHist.clear();
156  fBTrainBgdMVAHist.clear();
157  fTestSigMVAHist.clear();
158  fTestBgdMVAHist.clear();
159 
160  if (fMVAvalues) {
161  delete fMVAvalues;
162  fMVAvalues = 0;
163  }
164 }
165 
166 
167 ////////////////////////////////////////////////////////////////////////////////
168 /// Boost can handle classification with 2 classes and regression with one regression-target
169 
171 {
172  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
173  // if (type == Types::kRegression && numberTargets == 1) return kTRUE;
174  return kFALSE;
175 }
176 
177 
178 ////////////////////////////////////////////////////////////////////////////////
179 
181 {
182  DeclareOptionRef( fBoostNum = 1, "Boost_Num",
183  "Number of times the classifier is boosted" );
184 
185  DeclareOptionRef( fMonitorBoostedMethod = kTRUE, "Boost_MonitorMethod",
186  "Write monitoring histograms for each boosted classifier" );
187 
188  DeclareOptionRef( fDetailedMonitoring = kFALSE, "Boost_DetailedMonitoring",
189  "Produce histograms for detailed boost monitoring" );
190 
191  DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
192  AddPreDefVal(TString("RealAdaBoost"));
193  AddPreDefVal(TString("AdaBoost"));
194  AddPreDefVal(TString("Bagging"));
195 
196  DeclareOptionRef(fBaggedSampleFraction=.6,"Boost_BaggedSampleFraction","Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
197 
198  DeclareOptionRef( fAdaBoostBeta = 1.0, "Boost_AdaBoostBeta",
199  "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
200 
201  DeclareOptionRef( fTransformString = "step", "Boost_Transform",
202  "Type of transform applied to every boosted method linear, log, step" );
203  AddPreDefVal(TString("step"));
204  AddPreDefVal(TString("linear"));
205  AddPreDefVal(TString("log"));
206  AddPreDefVal(TString("gauss"));
207 
208  DeclareOptionRef( fRandomSeed = 0, "Boost_RandomSeed",
209  "Seed for random number generator used for bagging" );
210 
212 }
213 
214 ////////////////////////////////////////////////////////////////////////////////
215 /// options that are used ONLY for the READER to ensure backward compatibility
216 /// they are hence without any effect (the reader is only reading the training
217 /// options that HAD been used at the training of the .xml weight file at hand
218 
220 {
221 
223 
224  DeclareOptionRef( fHistoricOption = "ByError", "Boost_MethodWeightType",
225  "How to set the final weight of the boosted classifiers" );
226  AddPreDefVal(TString("ByError"));
227  AddPreDefVal(TString("Average"));
228  AddPreDefVal(TString("ByROC"));
229  AddPreDefVal(TString("ByOverlap"));
230  AddPreDefVal(TString("LastMethod"));
231 
232  DeclareOptionRef( fHistoricOption = "step", "Boost_Transform",
233  "Type of transform applied to every boosted method linear, log, step" );
234  AddPreDefVal(TString("step"));
235  AddPreDefVal(TString("linear"));
236  AddPreDefVal(TString("log"));
237  AddPreDefVal(TString("gauss"));
238 
239  // this option here
240  //DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
241  // still exists, but these two possible values
242  AddPreDefVal(TString("HighEdgeGauss"));
243  AddPreDefVal(TString("HighEdgeCoPara"));
244  // have been deleted .. hope that works :)
245 
246  DeclareOptionRef( fHistoricBoolOption, "Boost_RecalculateMVACut",
247  "Recalculate the classifier MVA Signallike cut at every boost iteration" );
248 
249 }
250 
251 ////////////////////////////////////////////////////////////////////////////////
252 /// just registering the string from which the boosted classifier will be created
253 
254 Bool_t TMVA::MethodBoost::BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption )
255 {
257  fBoostedMethodTitle = methodTitle;
258  fBoostedMethodOptions = theOption;
259  TString opts=theOption;
260  opts.ToLower();
261  // if (opts.Contains("vartransform")) Log() << kFATAL << "It is not possible to use boost in conjunction with variable transform. Please remove either Boost_Num or VarTransform from the option string"<< methodTitle<<Endl;
262 
263  return kTRUE;
264 }
265 
266 ////////////////////////////////////////////////////////////////////////////////
267 
269 {
270 }
271 
272 ////////////////////////////////////////////////////////////////////////////////
273 /// initialisation routine
274 
276 {
277 
279 
280  results->Store(new TH1F("MethodWeight","Normalized Classifier Weight",fBoostNum,0,fBoostNum),"ClassifierWeight");
281  results->Store(new TH1F("BoostWeight","Boost Weight",fBoostNum,0,fBoostNum),"BoostWeight");
282  results->Store(new TH1F("ErrFraction","Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),"ErrorFraction");
283  if (fDetailedMonitoring){
284  results->Store(new TH1F("ROCIntegral_test","ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegral_test");
285  results->Store(new TH1F("ROCIntegralBoosted_test","ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_test");
286  results->Store(new TH1F("ROCIntegral_train","ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),"ROCIntegral_train");
287  results->Store(new TH1F("ROCIntegralBoosted_train","ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_train");
288  results->Store(new TH1F("OverlapIntegal_train","Overlap integral (training sample)",fBoostNum,0,fBoostNum),"Overlap");
289  }
290 
291 
292  results->GetHist("ClassifierWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
293  results->GetHist("ClassifierWeight")->GetYaxis()->SetTitle("Classifier Weight");
294  results->GetHist("BoostWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
295  results->GetHist("BoostWeight")->GetYaxis()->SetTitle("Boost Weight");
296  results->GetHist("ErrorFraction")->GetXaxis()->SetTitle("Index of boosted classifier");
297  results->GetHist("ErrorFraction")->GetYaxis()->SetTitle("Error Fraction");
298  if (fDetailedMonitoring){
299  results->GetHist("ROCIntegral_test")->GetXaxis()->SetTitle("Index of boosted classifier");
300  results->GetHist("ROCIntegral_test")->GetYaxis()->SetTitle("ROC integral of single classifier");
301  results->GetHist("ROCIntegralBoosted_test")->GetXaxis()->SetTitle("Number of boosts");
302  results->GetHist("ROCIntegralBoosted_test")->GetYaxis()->SetTitle("ROC integral boosted");
303  results->GetHist("ROCIntegral_train")->GetXaxis()->SetTitle("Index of boosted classifier");
304  results->GetHist("ROCIntegral_train")->GetYaxis()->SetTitle("ROC integral of single classifier");
305  results->GetHist("ROCIntegralBoosted_train")->GetXaxis()->SetTitle("Number of boosts");
306  results->GetHist("ROCIntegralBoosted_train")->GetYaxis()->SetTitle("ROC integral boosted");
307  results->GetHist("Overlap")->GetXaxis()->SetTitle("Index of boosted classifier");
308  results->GetHist("Overlap")->GetYaxis()->SetTitle("Overlap integral");
309  }
310 
311  results->Store(new TH1F("SoverBtotal","S/B in reweighted training sample",fBoostNum,0,fBoostNum),"SoverBtotal");
312  results->GetHist("SoverBtotal")->GetYaxis()->SetTitle("S/B (boosted sample)");
313  results->GetHist("SoverBtotal")->GetXaxis()->SetTitle("Index of boosted classifier");
314 
315  results->Store(new TH1F("SeparationGain","SeparationGain",fBoostNum,0,fBoostNum),"SeparationGain");
316  results->GetHist("SeparationGain")->GetYaxis()->SetTitle("SeparationGain");
317  results->GetHist("SeparationGain")->GetXaxis()->SetTitle("Index of boosted classifier");
318 
319 
320 
321  fMonitorTree= new TTree("MonitorBoost","Boost variables");
322  fMonitorTree->Branch("iMethod",&fCurrentMethodIdx,"iMethod/I");
323  fMonitorTree->Branch("boostWeight",&fBoostWeight,"boostWeight/D");
324  fMonitorTree->Branch("errorFraction",&fMethodError,"errorFraction/D");
326 
327 }
328 
329 
330 ////////////////////////////////////////////////////////////////////////////////
331 
333 {
334  Log() << kDEBUG << "CheckSetup: fBoostType="<<fBoostType << Endl;
335  Log() << kDEBUG << "CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<Endl;
336  Log() << kDEBUG << "CheckSetup: fBoostWeight="<<fBoostWeight<<Endl;
337  Log() << kDEBUG << "CheckSetup: fMethodError="<<fMethodError<<Endl;
338  Log() << kDEBUG << "CheckSetup: fBoostNum="<<fBoostNum << Endl;
339  Log() << kDEBUG << "CheckSetup: fRandomSeed=" << fRandomSeed<< Endl;
340  Log() << kDEBUG << "CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<Endl;
341  Log() << kDEBUG << "CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<Endl;
342  Log() << kDEBUG << "CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod? "true" : "false") << Endl;
343  Log() << kDEBUG << "CheckSetup: MName=" << fBoostedMethodName << " Title="<< fBoostedMethodTitle<< Endl;
344  Log() << kDEBUG << "CheckSetup: MOptions="<< fBoostedMethodOptions << Endl;
345  Log() << kDEBUG << "CheckSetup: fMonitorTree=" << fMonitorTree <<Endl;
346  Log() << kDEBUG << "CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx << Endl;
347  if (fMethods.size()>0) Log() << kDEBUG << "CheckSetup: fMethods[0]" <<fMethods[0]<<Endl;
348  Log() << kDEBUG << "CheckSetup: fMethodWeight.size()" << fMethodWeight.size() << Endl;
349  if (fMethodWeight.size()>0) Log() << kDEBUG << "CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<Endl;
350  Log() << kDEBUG << "CheckSetup: trying to repair things" << Endl;
351 
352 }
353 ////////////////////////////////////////////////////////////////////////////////
354 
356 {
357  TDirectory* methodDir( 0 );
358  TString dirName,dirTitle;
359  Int_t StopCounter=0;
361 
362 
363  InitHistos();
364 
365  if (Data()->GetNTrainingEvents()==0) Log() << kFATAL << "<Train> Data() has zero events" << Endl;
367 
368  if (fMethods.size() > 0) fMethods.clear();
369  fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0);
370 
371  Log() << kINFO << "Training "<< fBoostNum << " " << fBoostedMethodName << " with title " << fBoostedMethodTitle << " Classifiers ... patience please" << Endl;
372  Timer timer( fBoostNum, GetName() );
373 
375 
376  // clean boosted method options
378 
379 
380  // remove transformations for individual boosting steps
381  // the transformation of the main method will be rerouted to each of the boost steps
382  Ssiz_t varTrafoStart=fBoostedMethodOptions.Index("~VarTransform=");
383  if (varTrafoStart >0) {
384  Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(":",varTrafoStart);
385  if (varTrafoEnd<varTrafoStart)
386  varTrafoEnd=fBoostedMethodOptions.Length();
387  fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
388  }
389 
390  //
391  // training and boosting the classifiers
393  // the first classifier shows the option string output, the rest not
395 
400 
401  // suppressing the rest of the classifier output the right way
402  fCurrentMethod = (dynamic_cast<MethodBase*>(method));
403 
404  if (fCurrentMethod==0) {
405  Log() << kFATAL << "uups.. guess the booking of the " << fCurrentMethodIdx << "-th classifier somehow failed" << Endl;
406  return; // hope that makes coverity happy (as if fears I might use the pointer later on, not knowing that FATAL exits
407  }
408 
409  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
410  if (fCurrentMethod->GetMethodType() == Types::kCategory) { // DSMTEST
411  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(fCurrentMethod)); // DSMTEST
412  if (!methCat) // DSMTEST
413  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" << Endl; // DSMTEST
414  methCat->fDataSetManager = fDataSetManager; // DSMTEST
415  } // DSMTEST
416 
417  fCurrentMethod->SetMsgType(kWARNING);
420  // put SetAnalysisType here for the needs of MLP
424 
425 
426  // reroute transformationhandler
428 
429 
430  // creating the directory of the classifier
431  if(!IsSilentFile())
432  {
433  if (fMonitorBoostedMethod) {
434  methodDir=GetFile()->GetDirectory(dirName=Form("%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
435  if (methodDir==0) {
436  methodDir=BaseDir()->mkdir(dirName,dirTitle=Form("Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
437  }
438  fCurrentMethod->SetMethodDir(methodDir);
439  fCurrentMethod->BaseDir()->cd();
440  }
441  }
442 
443  // training
444  TMVA::MethodCompositeBase::fMethods.push_back(method);
448  TMVA::MsgLogger::InhibitOutput(); //suppressing Logger outside the method
449  if (fBoostType=="Bagging") Bagging(); // you want also to train the first classifier on a bagged sample
450  SingleTrain();
453 
454  // calculate MVA values of current method for all events in training sample
455  // (used later on to get 'misclassified events' etc for the boosting
456  CalcMVAValues();
457 
459 
460  // get ROC integral and overlap integral for single method on
461  // training sample if fMethodWeightType == "ByROC" or the user
462  // wants detailed monitoring
463 
464  // boosting (reweight training sample)
467 
469  results->GetHist("BoostWeight")->SetBinContent(fCurrentMethodIdx+1,fBoostWeight);
470  results->GetHist("ErrorFraction")->SetBinContent(fCurrentMethodIdx+1,fMethodError);
471 
472  if (fDetailedMonitoring) {
475  results->GetHist("ROCIntegralBoosted_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTesting));
476  results->GetHist("ROCIntegral_train")->SetBinContent(fCurrentMethodIdx+1, fROC_training);
477  results->GetHist("ROCIntegralBoosted_train")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTraining));
478  results->GetHist("Overlap")->SetBinContent(fCurrentMethodIdx+1, fOverlap_integral);
479  }
480 
481 
482 
483  fMonitorTree->Fill();
484 
485  // stop boosting if needed when error has reached 0.5
486  // thought of counting a few steps, but it doesn't seem to be necessary
487  Log() << kDEBUG << "AdaBoost (methodErr) err = " << fMethodError << Endl;
488  if (fMethodError > 0.49999) StopCounter++;
489  if (StopCounter > 0 && fBoostType != "Bagging") {
490  timer.DrawProgressBar( fBoostNum );
491  fBoostNum = fCurrentMethodIdx+1;
492  Log() << kINFO << "Error rate has reached 0.5 ("<< fMethodError<<"), boosting process stopped at #" << fBoostNum << " classifier" << Endl;
493  if (fBoostNum < 5)
494  Log() << kINFO << "The classifier might be too strong to boost with Beta = " << fAdaBoostBeta << ", try reducing it." <<Endl;
495  break;
496  }
497  }
498 
499  //as MethodBoost acts not on a private event sample (like MethodBDT does), we need to remember not
500  // to leave "boosted" events to the next classifier in the factory
501 
503 
504  Timer* timer1= new Timer( fBoostNum, GetName() );
505  // normalizing the weights of the classifiers
507  // performing post-boosting actions
508 
510 
511  if (fCurrentMethodIdx==fBoostNum) {
512  Log() << kINFO << "Elapsed time: " << timer1->GetElapsedTime()
513  << " " << Endl;
514  }
515 
516  TH1F* tmp = dynamic_cast<TH1F*>( results->GetHist("ClassifierWeight") );
518 
519  }
520 
521  // Ensure that in case of only 1 boost the method weight equals
522  // 1.0. This avoids unexpected behaviour in case of very bad
523  // classifiers which have fBoostWeight=1 or fMethodError=0.5,
524  // because their weight would be set to zero. This behaviour is
525  // not ok if one boosts just one time.
526  if (fMethods.size()==1) fMethodWeight[0] = 1.0;
527 
529 
530  delete timer1;
531 }
532 
533 ////////////////////////////////////////////////////////////////////////////////
534 
536 {
538 }
539 
540 ////////////////////////////////////////////////////////////////////////////////
541 
543 {
544  if (fBoostNum <=0) Log() << kFATAL << "CreateHistograms called before fBoostNum is initialized" << Endl;
545  // calculating histograms boundaries and creating histograms..
546  // nrms = number of rms around the average to use for outline (of the 0 classifier)
547  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
548  Int_t signalClass = 0;
549  if (DataInfo().GetClassInfo("Signal") != 0) {
550  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
551  }
553  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
554 
556  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
557  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.00001;
558 
559  // creating all the histograms
560  for (UInt_t imtd=0; imtd<fBoostNum; imtd++) {
561  fTrainSigMVAHist .push_back( new TH1F( Form("MVA_Train_S_%04i",imtd), "MVA_Train_S", fNbins, xmin, xmax ) );
562  fTrainBgdMVAHist .push_back( new TH1F( Form("MVA_Train_B%04i", imtd), "MVA_Train_B", fNbins, xmin, xmax ) );
563  fBTrainSigMVAHist.push_back( new TH1F( Form("MVA_BTrain_S%04i",imtd), "MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
564  fBTrainBgdMVAHist.push_back( new TH1F( Form("MVA_BTrain_B%04i",imtd), "MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
565  fTestSigMVAHist .push_back( new TH1F( Form("MVA_Test_S%04i", imtd), "MVA_Test_S", fNbins, xmin, xmax ) );
566  fTestBgdMVAHist .push_back( new TH1F( Form("MVA_Test_B%04i", imtd), "MVA_Test_B", fNbins, xmin, xmax ) );
567  }
568 }
569 
570 ////////////////////////////////////////////////////////////////////////////////
571 /// resetting back the boosted weights of the events to 1
572 
574 {
575  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
576  const Event *ev = Data()->GetEvent(ievt);
577  ev->SetBoostWeight( 1.0 );
578  }
579 }
580 
581 ////////////////////////////////////////////////////////////////////////////////
582 
584 {
585  TDirectory* dir=0;
586  if (fMonitorBoostedMethod) {
587  for (UInt_t imtd=0;imtd<fBoostNum;imtd++) {
588 
589  //writing the histograms in the specific classifier's directory
590  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[imtd]);
591  if (!m) continue;
592  dir = m->BaseDir();
593  dir->cd();
594  fTrainSigMVAHist[imtd]->SetDirectory(dir);
595  fTrainSigMVAHist[imtd]->Write();
596  fTrainBgdMVAHist[imtd]->SetDirectory(dir);
597  fTrainBgdMVAHist[imtd]->Write();
598  fBTrainSigMVAHist[imtd]->SetDirectory(dir);
599  fBTrainSigMVAHist[imtd]->Write();
600  fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
601  fBTrainBgdMVAHist[imtd]->Write();
602  }
603  }
604 
605  // going back to the original folder
606  BaseDir()->cd();
607 
608  fMonitorTree->Write();
609 }
610 
611 ////////////////////////////////////////////////////////////////////////////////
612 
614 {
616  if (fMonitorBoostedMethod) {
617  UInt_t nloop = fTestSigMVAHist.size();
618  if (fMethods.size()<nloop) nloop = fMethods.size();
619  //running over all the events and populating the test MVA histograms
621  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
622  const Event* ev = GetEvent(ievt);
623  Float_t w = ev->GetWeight();
624  if (DataInfo().IsSignal(ev)) {
625  for (UInt_t imtd=0; imtd<nloop; imtd++) {
626  fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
627  }
628  }
629  else {
630  for (UInt_t imtd=0; imtd<nloop; imtd++) {
631  fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
632  }
633  }
634  }
636  }
637 }
638 
639 ////////////////////////////////////////////////////////////////////////////////
640 
642 {
644  if (treetype==Types::kTraining) return;
645  UInt_t nloop = fTestSigMVAHist.size();
646  if (fMethods.size()<nloop) nloop = fMethods.size();
647  if (fMonitorBoostedMethod) {
648  TDirectory* dir=0;
649  for (UInt_t imtd=0;imtd<nloop;imtd++) {
650  //writing the histograms in the specific classifier's directory
651  MethodBase* mva = dynamic_cast<MethodBase*>(fMethods[imtd]);
652  if (!mva) continue;
653  dir = mva->BaseDir();
654  if (dir==0) continue;
655  dir->cd();
656  fTestSigMVAHist[imtd]->SetDirectory(dir);
657  fTestSigMVAHist[imtd]->Write();
658  fTestBgdMVAHist[imtd]->SetDirectory(dir);
659  fTestBgdMVAHist[imtd]->Write();
660  }
661  }
662 }
663 
664 ////////////////////////////////////////////////////////////////////////////////
665 /// process user options
666 
668 {
669 }
670 
671 ////////////////////////////////////////////////////////////////////////////////
672 /// initialization
673 
675 {
677  MethodBase* meth = dynamic_cast<MethodBase*>(GetLastMethod());
678  if (meth){
679  meth->SetSilentFile(IsSilentFile());
680  if(IsModelPersistence()){
681  TString _fFileDir= DataInfo().GetName();
682  _fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
683  meth->SetWeightFileDir(_fFileDir);
684  }
686  meth->TrainMethod();
687  }
688 }
689 
690 ////////////////////////////////////////////////////////////////////////////////
691 /// find the CUT on the individual MVA that defines an event as
692 /// correct or misclassified (to be used in the boosting process)
693 
695 {
696  if (!method || method->GetMethodType() == Types::kDT ){ return;}
697 
698  // creating a fine histograms containing the error rate
699  const Int_t nBins=10001;
700  Double_t minMVA=150000;
701  Double_t maxMVA=-150000;
702  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
703  GetEvent(ievt);
704  Double_t val=method->GetMvaValue();
705  //Helge .. I think one could very well use fMVAValues for that ... -->to do
706  if (val>maxMVA) maxMVA=val;
707  if (val<minMVA) minMVA=val;
708  }
709  maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
710 
711  Double_t sum = 0.;
712 
713  TH1D *mvaS = new TH1D(Form("MVAS_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
714  TH1D *mvaB = new TH1D(Form("MVAB_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
715  TH1D *mvaSC = new TH1D(Form("MVASC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
716  TH1D *mvaBC = new TH1D(Form("MVABC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
717 
718 
720  if (fDetailedMonitoring){
721  results->Store(mvaS, Form("MVAS_%d",fCurrentMethodIdx));
722  results->Store(mvaB, Form("MVAB_%d",fCurrentMethodIdx));
723  results->Store(mvaSC,Form("MVASC_%d",fCurrentMethodIdx));
724  results->Store(mvaBC,Form("MVABC_%d",fCurrentMethodIdx));
725  }
726 
727  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
728 
729  Double_t weight = GetEvent(ievt)->GetWeight();
730  Double_t mvaVal=method->GetMvaValue();
731  sum +=weight;
732  if (DataInfo().IsSignal(GetEvent(ievt))){
733  mvaS->Fill(mvaVal,weight);
734  }else {
735  mvaB->Fill(mvaVal,weight);
736  }
737  }
738  SeparationBase *sepGain;
739 
740 
741  // Boosting should use Misclassification not Gini Index (changed, Helge 31.5.2013)
742  // WARNING! It works with Misclassification only if you fix the signal to
743  // background at every step. Strangely enough, there are better results
744  // ( as seen with BDT ) if you use Gini Index, and accept that sometimes no
745  // sensible cut is found - i.e. the cut is then outside the MVA value range,
746  // all events are classified as background and then according to the Boost
747  // algorithm something is renormed 'automatically' ... so that in the next
748  // step again the result is something sensible.
749  // Strange ... that THIS is supposed to be right?
750 
751  // SeparationBase *sepGain2 = new MisClassificationError();
752  //sepGain = new MisClassificationError();
753  sepGain = new GiniIndex();
754  //sepGain = new CrossEntropy();
755 
756  Double_t sTot = mvaS->GetSum();
757  Double_t bTot = mvaB->GetSum();
758 
759  mvaSC->SetBinContent(1,mvaS->GetBinContent(1));
760  mvaBC->SetBinContent(1,mvaB->GetBinContent(1));
761  Double_t sSel=0;
762  Double_t bSel=0;
763  Double_t separationGain=sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
764  Double_t mvaCut=mvaSC->GetBinLowEdge(1);
765  Double_t sSelCut=sSel;
766  Double_t bSelCut=bSel;
767  // std::cout << "minMVA =" << minMVA << " maxMVA = " << maxMVA << " width = " << mvaSC->GetBinWidth(1) << std::endl;
768 
769  // for (Int_t ibin=1;ibin<=nBins;ibin++) std::cout << " cutvalues[" << ibin<<"]="<<mvaSC->GetBinLowEdge(ibin) << " " << mvaSC->GetBinCenter(ibin) << std::endl;
770  Double_t mvaCutOrientation=1; // 1 if mva > mvaCut --> Signal and -1 if mva < mvaCut (i.e. mva*-1 > mvaCut*-1) --> Signal
771  for (Int_t ibin=1;ibin<=nBins;ibin++){
772  mvaSC->SetBinContent(ibin,mvaS->GetBinContent(ibin)+mvaSC->GetBinContent(ibin-1));
773  mvaBC->SetBinContent(ibin,mvaB->GetBinContent(ibin)+mvaBC->GetBinContent(ibin-1));
774 
775  sSel=mvaSC->GetBinContent(ibin);
776  bSel=mvaBC->GetBinContent(ibin);
777 
778  // if (ibin==nBins){
779  // std::cout << "Last bin s="<< sSel <<" b="<<bSel << " s="<< sTot-sSel <<" b="<<bTot-bSel << endl;
780  // }
781 
782  if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
783  // && (mvaSC->GetBinCenter(ibin) >0 || (fCurrentMethodIdx+1)%2 )
784  ){
785  separationGain = sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
786  // mvaCut=mvaSC->GetBinCenter(ibin);
787  mvaCut=mvaSC->GetBinLowEdge(ibin+1);
788  // if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) mvaCutOrientation=-1;
789  if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
790  else mvaCutOrientation=1;
791  sSelCut=sSel;
792  bSelCut=bSel;
793  // std::cout << "new cut at " << mvaCut << "with s="<<sTot-sSel << " b="<<bTot-bSel << std::endl;
794  }
795  /*
796  Double_t ori;
797  if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) ori=-1;
798  else ori=1;
799  std::cout << ibin << " mvacut="<<mvaCut
800  << " sTot=" << sTot
801  << " bTot=" << bTot
802  << " sSel=" << sSel
803  << " bSel=" << bSel
804  << " s/b(1)=" << sSel/bSel
805  << " s/b(2)=" << (sTot-sSel)/(bTot-bSel)
806  << " sepGain="<<sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
807  << " sepGain2="<<sepGain2->GetSeparationGain(sSel,bSel,sTot,bTot)
808  << " " <<ori
809  << std::endl;
810  */
811 
812  }
813 
814  if (0){
815  double parentIndex=sepGain->GetSeparationIndex(sTot,bTot);
816  double leftIndex =sepGain->GetSeparationIndex(sSelCut,bSelCut);
817  double rightIndex =sepGain->GetSeparationIndex(sTot-sSelCut,bTot-bSelCut);
818  std::cout
819  << " sTot=" << sTot
820  << " bTot=" << bTot
821  << " s="<<sSelCut
822  << " b="<<bSelCut
823  << " s2="<<(sTot-sSelCut)
824  << " b2="<<(bTot-bSelCut)
825  << " s/b(1)=" << sSelCut/bSelCut
826  << " s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
827  << " index before cut=" << parentIndex
828  << " after: left=" << leftIndex
829  << " after: right=" << rightIndex
830  << " sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
831  << " sepGain="<<separationGain
832  << " sepGain="<<sepGain->GetSeparationGain(sSelCut,bSelCut,sTot,bTot)
833  << " cut=" << mvaCut
834  << " idx="<<fCurrentMethodIdx
835  << " cutOrientation="<<mvaCutOrientation
836  << std::endl;
837  }
838  method->SetSignalReferenceCut(mvaCut);
839  method->SetSignalReferenceCutOrientation(mvaCutOrientation);
840 
841  results->GetHist("SeparationGain")->SetBinContent(fCurrentMethodIdx+1,separationGain);
842 
843 
844  Log() << kDEBUG << "(old step) Setting method cut to " <<method->GetSignalReferenceCut()<< Endl;
845 
846  if(IsSilentFile())
847  {
848  mvaS ->Delete();
849  mvaB ->Delete();
850  mvaSC->Delete();
851  mvaBC->Delete();
852  }
853 }
854 
855 ////////////////////////////////////////////////////////////////////////////////
856 
858 {
859  Double_t returnVal=-1;
860 
861 
862  if (fBoostType=="AdaBoost") returnVal = this->AdaBoost (method,1);
863  else if (fBoostType=="RealAdaBoost") returnVal = this->AdaBoost (method,0);
864  else if (fBoostType=="Bagging") returnVal = this->Bagging ();
865  else{
866  Log() << kFATAL << "<Boost> unknown boost option " << fBoostType<< " called" << Endl;
867  }
868  fMethodWeight.push_back(returnVal);
869  return returnVal;
870 }
871 ////////////////////////////////////////////////////////////////////////////////
872 /// the standard (discrete or real) AdaBoost algorithm
873 
875 {
876  if (!method) {
877  Log() << kWARNING << " AdaBoost called without classifier reference - needed for calculating AdaBoost " << Endl;
878  return 0;
879  }
880 
881  Float_t w,v; Bool_t sig=kTRUE;
882  Double_t sumAll=0, sumWrong=0;
883  Bool_t* WrongDetection=new Bool_t[GetNEvents()];
884  QuickMVAProbEstimator *MVAProb=NULL;
885 
886  if (discreteAdaBoost) {
887  FindMVACut(method);
888  Log() << kDEBUG << " individual mva cut value = " << method->GetSignalReferenceCut() << Endl;
889  } else {
890  MVAProb=new TMVA::QuickMVAProbEstimator();
891  // the RealAdaBoost does use a simple "yes (signal)" or "no (background)"
892  // answer from your single MVA, but a "signal probability" instead (in the BDT case,
893  // that would be the 'purity' in the leaf node. For some MLP parameter, the MVA output
894  // can also interpreted as a probability, but here I try a general approach to get this
895  // probability from the MVA distributions...
896 
897  for (Long64_t evt=0; evt<GetNEvents(); evt++) {
898  const Event* ev = Data()->GetEvent(evt);
899  MVAProb->AddEvent(fMVAvalues->at(evt),ev->GetWeight(),ev->GetClass());
900  }
901  }
902 
903 
904  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=kTRUE;
905 
906  // finding the wrong events and calculating their total weights
907  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
908  const Event* ev = GetEvent(ievt);
909  sig=DataInfo().IsSignal(ev);
910  v = fMVAvalues->at(ievt);
911  w = ev->GetWeight();
912  sumAll += w;
913  if(!IsSilentFile())
914  {
915  if (fMonitorBoostedMethod) {
916  if (sig) {
919  }
920  else {
923  }
924  }
925  }
926 
927  if (discreteAdaBoost){
928  if (sig == method->IsSignalLike(fMVAvalues->at(ievt))){
929  WrongDetection[ievt]=kFALSE;
930  }else{
931  WrongDetection[ievt]=kTRUE;
932  sumWrong+=w;
933  }
934  }else{
935  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
936  mvaProb = 2*(mvaProb-0.5);
937  Int_t trueType;
938  if (DataInfo().IsSignal(ev)) trueType = 1;
939  else trueType = -1;
940  sumWrong+= w*trueType*mvaProb;
941  }
942  }
943 
944  fMethodError=sumWrong/sumAll;
945 
946  // calculating the fMethodError and the boostWeight out of it uses the formula
947  // w = ((1-err)/err)^beta
948 
949  Double_t boostWeight=0;
950 
951  if (fMethodError == 0) { //no misclassification made.. perfect, no boost ;)
952  Log() << kWARNING << "Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " << Endl;
953  }else{
954 
955  if (discreteAdaBoost)
957  else
958  boostWeight = TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
959 
960 
961  // std::cout << "boostweight = " << boostWeight << std::endl;
962 
963  // ADA boosting, rescaling the weight of the wrong events according to the error level
964  // over the entire test sample rescaling all the weights to have the same sum, but without
965  // touching the original weights (changing only the boosted weight of all the events)
966  // first reweight
967 
968  Double_t newSum=0., oldSum=0.;
969 
970 
971  Double_t boostfactor = TMath::Exp(boostWeight);
972 
973 
974  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
975  const Event* ev = Data()->GetEvent(ievt);
976  oldSum += ev->GetWeight();
977  if (discreteAdaBoost){
978  // events are classified as Signal OR background .. right or wrong
979  if (WrongDetection[ievt] && boostWeight != 0) {
980  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
981  else ev->ScaleBoostWeight(1./boostfactor);
982  }
983  // if (ievt<30) std::cout<<ievt<<" var0="<<ev->GetValue(0)<<" var1="<<ev->GetValue(1)<<" weight="<<ev->GetWeight() << " boostby:"<<boostfactor<<std::endl;
984 
985  }else{
986  // events are classified by their probability of being signal or background
987  // (eventually you should write this one - i.e. re-use the MVA value that were already
988  // calculated and stored.. however ,for the moment ..
989  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
990  mvaProb = 2*(mvaProb-0.5);
991  // mvaProb = (1-mvaProb);
992 
993  Int_t trueType=1;
994  if (DataInfo().IsSignal(ev)) trueType = 1;
995  else trueType = -1;
996 
997  boostfactor = TMath::Exp(-1*boostWeight*trueType*mvaProb);
998  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
999  else ev->ScaleBoostWeight(1./boostfactor);
1000 
1001  }
1002  newSum += ev->GetWeight();
1003  }
1004 
1005  Double_t normWeight = oldSum/newSum;
1006  // next normalize the weights
1007  Double_t normSig=0, normBkg=0;
1008  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1009  const Event* ev = Data()->GetEvent(ievt);
1010  ev->ScaleBoostWeight(normWeight);
1011  if (ev->GetClass()) normSig+=ev->GetWeight();
1012  else normBkg+=ev->GetWeight();
1013  }
1014 
1016  results->GetHist("SoverBtotal")->SetBinContent(fCurrentMethodIdx+1, normSig/normBkg);
1017 
1018  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1019  const Event* ev = Data()->GetEvent(ievt);
1020 
1021  if (ev->GetClass()) ev->ScaleBoostWeight(oldSum/normSig/2);
1022  else ev->ScaleBoostWeight(oldSum/normBkg/2);
1023  }
1024  }
1025 
1026  delete[] WrongDetection;
1027  if (MVAProb) delete MVAProb;
1028 
1029  fBoostWeight = boostWeight; // used ONLY for the monitoring tree
1030 
1031  return boostWeight;
1032 }
1033 
1034 
1035 ////////////////////////////////////////////////////////////////////////////////
1036 /// Bagging or Bootstrap boosting, gives new random poisson weight for every event
1037 
1039 {
1040  TRandom3 *trandom = new TRandom3(fRandomSeed+fMethods.size());
1041  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1042  const Event* ev = Data()->GetEvent(ievt);
1044  }
1045  fBoostWeight = 1; // used ONLY for the monitoring tree
1046  return 1.;
1047 }
1048 
1049 
1050 ////////////////////////////////////////////////////////////////////////////////
1051 /// Get help message text
1052 ///
1053 /// typical length of text line:
1054 /// "|--------------------------------------------------------------|"
1055 
1057 {
1058  Log() << Endl;
1059  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1060  Log() << Endl;
1061  Log() << "This method combines several classifier of one species in a "<<Endl;
1062  Log() << "single multivariate quantity via the boost algorithm." << Endl;
1063  Log() << "the output is a weighted sum over all individual classifiers" <<Endl;
1064  Log() << "By default, the AdaBoost method is employed, which gives " << Endl;
1065  Log() << "events that were misclassified in the previous tree a larger " << Endl;
1066  Log() << "weight in the training of the following classifier."<<Endl;
1067  Log() << "Optionally, Bagged boosting can also be applied." << Endl;
1068  Log() << Endl;
1069  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1070  Log() << Endl;
1071  Log() << "The most important parameter in the configuration is the "<<Endl;
1072  Log() << "number of boosts applied (Boost_Num) and the choice of boosting"<<Endl;
1073  Log() << "(Boost_Type), which can be set to either AdaBoost or Bagging." << Endl;
1074  Log() << "AdaBoosting: The most important parameters in this configuration" <<Endl;
1075  Log() << "is the beta parameter (Boost_AdaBoostBeta) " << Endl;
1076  Log() << "When boosting a linear classifier, it is sometimes advantageous"<<Endl;
1077  Log() << "to transform the MVA output non-linearly. The following options" <<Endl;
1078  Log() << "are available: step, log, and minmax, the default is no transform."<<Endl;
1079  Log() <<Endl;
1080  Log() << "Some classifiers are hard to boost and do not improve much in"<<Endl;
1081  Log() << "their performance by boosting them, some even slightly deteriorate"<< Endl;
1082  Log() << "due to the boosting." <<Endl;
1083  Log() << "The booking of the boost method is special since it requires"<<Endl;
1084  Log() << "the booing of the method to be boosted and the boost itself."<<Endl;
1085  Log() << "This is solved by booking the method to be boosted and to add"<<Endl;
1086  Log() << "all Boost parameters, which all begin with \"Boost_\" to the"<<Endl;
1087  Log() << "options string. The factory separates the options and initiates"<<Endl;
1088  Log() << "the boost process. The TMVA macro directory contains the example"<<Endl;
1089  Log() << "macro \"Boost.C\"" <<Endl;
1090 }
1091 
1092 ////////////////////////////////////////////////////////////////////////////////
1093 
1095 {
1096  return 0;
1097 }
1098 
1099 ////////////////////////////////////////////////////////////////////////////////
1100 /// return boosted MVA response
1101 
1103 {
1104  Double_t mvaValue = 0;
1105  Double_t norm = 0;
1106  Double_t epsilon = TMath::Exp(-1.);
1107  //Double_t fact = TMath::Exp(-1.)+TMath::Exp(1.);
1108  for (UInt_t i=0;i< fMethods.size(); i++){
1109  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[i]);
1110  if (m==0) continue;
1111  Double_t val = fTmpEvent ? m->GetMvaValue(fTmpEvent) : m->GetMvaValue();
1112  Double_t sigcut = m->GetSignalReferenceCut();
1113 
1114  // default is no transform
1115  if (fTransformString == "linear"){
1116 
1117  }
1118  else if (fTransformString == "log"){
1119  if (val < sigcut) val = sigcut;
1120 
1121  val = TMath::Log((val-sigcut)+epsilon);
1122  }
1123  else if (fTransformString == "step" ){
1124  if (m->IsSignalLike(val)) val = 1.;
1125  else val = -1.;
1126  }
1127  else if (fTransformString == "gauss"){
1128  val = TMath::Gaus((val-sigcut),1);
1129  }
1130  else {
1131  Log() << kFATAL << "error unknown transformation " << fTransformString<<Endl;
1132  }
1133  mvaValue+=val*fMethodWeight[i];
1134  norm +=fMethodWeight[i];
1135  // std::cout << "mva("<<i<<") = "<<val<<" " << valx<< " " << mvaValue<<" and sigcut="<<sigcut << std::endl;
1136  }
1137  mvaValue/=norm;
1138  // cannot determine error
1139  NoErrorCalc(err, errUpper);
1140 
1141  return mvaValue;
1142 }
1143 
1144 ////////////////////////////////////////////////////////////////////////////////
1145 /// Calculate the ROC integral of a single classifier or even the
1146 /// whole boosted classifier. The tree type (training or testing
1147 /// sample) is specified by 'eTT'.
1148 ///
1149 /// If tree type kTraining is set, the original training sample is
1150 /// used to compute the ROC integral (original weights).
1151 ///
1152 /// - singleMethod - if kTRUE, return ROC integral of single (last
1153 /// trained) classifier; if kFALSE, return ROC
1154 /// integral of full classifier
1155 ///
1156 /// - eTT - tree type (Types::kTraining / Types::kTesting)
1157 ///
1158 /// - CalcOverlapIntergral - if kTRUE, the overlap integral of the
1159 /// signal/background MVA distributions
1160 /// is calculated and stored in
1161 /// 'fOverlap_integral'
1162 
1164 {
1165  // set data sample training / testing
1166  Data()->SetCurrentType(eTT);
1167 
1168  MethodBase* method = singleMethod ? dynamic_cast<MethodBase*>(fMethods.back()) : 0; // ToDo CoVerity flags this line as there is no protection against a zero-pointer delivered by dynamic_cast
1169  // to make CoVerity happy (although, OF COURSE, the last method in the committee
1170  // has to be also of type MethodBase as ANY method is... hence the dynamic_cast
1171  // will never by "zero" ...
1172  if (singleMethod && !method) {
1173  Log() << kFATAL << " What do you do? Your method:"
1174  << fMethods.back()->GetName()
1175  << " seems not to be a propper TMVA method"
1176  << Endl;
1177  std::exit(1);
1178  }
1179  Double_t err = 0.0;
1180 
1181  // temporary renormalize the method weights in case of evaluation
1182  // of full classifier.
1183  // save the old normalization of the methods
1184  std::vector<Double_t> OldMethodWeight(fMethodWeight);
1185  if (!singleMethod) {
1186  // calculate sum of weights of all methods
1187  Double_t AllMethodsWeight = 0;
1188  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1189  AllMethodsWeight += fMethodWeight.at(i);
1190  // normalize the weights of the classifiers
1191  if (AllMethodsWeight != 0.0) {
1192  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1193  fMethodWeight[i] /= AllMethodsWeight;
1194  }
1195  }
1196 
1197  // calculate MVA values
1198  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
1199  std::vector <Float_t>* mvaRes;
1200  if (singleMethod && eTT==Types::kTraining)
1201  mvaRes = fMVAvalues; // values already calculated
1202  else {
1203  mvaRes = new std::vector <Float_t>(GetNEvents());
1204  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1205  GetEvent(ievt);
1206  (*mvaRes)[ievt] = singleMethod ? method->GetMvaValue(&err) : GetMvaValue(&err);
1207  }
1208  }
1209 
1210  // restore the method weights
1211  if (!singleMethod)
1212  fMethodWeight = OldMethodWeight;
1213 
1214  // now create histograms for calculation of the ROC integral
1215  Int_t signalClass = 0;
1216  if (DataInfo().GetClassInfo("Signal") != 0) {
1217  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1218  }
1219  gTools().ComputeStat( GetEventCollection(eTT), mvaRes,
1220  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
1221 
1223  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
1224  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.0001;
1225 
1226  // calculate ROC integral
1227  TH1* mva_s = new TH1F( "MVA_S", "MVA_S", fNbins, xmin, xmax );
1228  TH1* mva_b = new TH1F( "MVA_B", "MVA_B", fNbins, xmin, xmax );
1229  TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1230  if (CalcOverlapIntergral) {
1231  mva_s_overlap = new TH1F( "MVA_S_OVERLAP", "MVA_S_OVERLAP", fNbins, xmin, xmax );
1232  mva_b_overlap = new TH1F( "MVA_B_OVERLAP", "MVA_B_OVERLAP", fNbins, xmin, xmax );
1233  }
1234  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1235  const Event* ev = GetEvent(ievt);
1236  Float_t w = (eTT==Types::kTesting ? ev->GetWeight() : ev->GetOriginalWeight());
1237  if (DataInfo().IsSignal(ev)) mva_s->Fill( (*mvaRes)[ievt], w );
1238  else mva_b->Fill( (*mvaRes)[ievt], w );
1239 
1240  if (CalcOverlapIntergral) {
1241  Float_t w_ov = ev->GetWeight();
1242  if (DataInfo().IsSignal(ev))
1243  mva_s_overlap->Fill( (*mvaRes)[ievt], w_ov );
1244  else
1245  mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1246  }
1247  }
1248  gTools().NormHist( mva_s );
1249  gTools().NormHist( mva_b );
1250  PDF *fS = new PDF( "PDF Sig", mva_s, PDF::kSpline2 );
1251  PDF *fB = new PDF( "PDF Bkg", mva_b, PDF::kSpline2 );
1252 
1253  // calculate ROC integral from fS, fB
1254  Double_t ROC = MethodBase::GetROCIntegral(fS, fB);
1255 
1256  // calculate overlap integral
1257  if (CalcOverlapIntergral) {
1258  gTools().NormHist( mva_s_overlap );
1259  gTools().NormHist( mva_b_overlap );
1260 
1261  fOverlap_integral = 0.0;
1262  for (Int_t bin=1; bin<=mva_s_overlap->GetNbinsX(); bin++){
1263  Double_t bc_s = mva_s_overlap->GetBinContent(bin);
1264  Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1265  if (bc_s > 0.0 && bc_b > 0.0)
1266  fOverlap_integral += TMath::Min(bc_s, bc_b);
1267  }
1268 
1269  delete mva_s_overlap;
1270  delete mva_b_overlap;
1271  }
1272 
1273  delete mva_s;
1274  delete mva_b;
1275  delete fS;
1276  delete fB;
1277  if (!(singleMethod && eTT==Types::kTraining)) delete mvaRes;
1278 
1280 
1281  return ROC;
1282 }
1283 
1285 {
1286  // Calculate MVA values of current method fMethods.back() on
1287  // training sample
1288 
1290  MethodBase* method = dynamic_cast<MethodBase*>(fMethods.back());
1291  if (!method) {
1292  Log() << kFATAL << "dynamic cast to MethodBase* failed" <<Endl;
1293  return;
1294  }
1295  // calculate MVA values
1296  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1297  GetEvent(ievt);
1298  fMVAvalues->at(ievt) = method->GetMvaValue();
1299  }
1300 
1301  // fill cumulative mva distribution
1302 
1303 
1304 }
1305 
1306 ////////////////////////////////////////////////////////////////////////////////
1307 /// fill various monitoring histograms from information of the individual classifiers that
1308 /// have been boosted.
1309 /// of course.... this depends very much on the individual classifiers, and so far, only for
1310 /// Decision Trees, this monitoring is actually implemented
1311 
1313 {
1315 
1316  if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kDT) {
1317  TMVA::MethodDT* currentDT=dynamic_cast<TMVA::MethodDT*>(GetCurrentMethod(methodIndex));
1318  if (currentDT){
1319  if (stage == Types::kBoostProcBegin){
1320  results->Store(new TH1I("NodesBeforePruning","nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesBeforePruning");
1321  results->Store(new TH1I("NodesAfterPruning","nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesAfterPruning");
1322  }
1323 
1324  if (stage == Types::kBeforeTraining){
1325  }
1326  else if (stage == Types::kBeforeBoosting){
1327  results->GetHist("NodesBeforePruning")->SetBinContent(methodIndex+1,currentDT->GetNNodesBeforePruning());
1328  results->GetHist("NodesAfterPruning")->SetBinContent(methodIndex+1,currentDT->GetNNodes());
1329  }
1330  else if (stage == Types::kAfterBoosting){
1331 
1332  }
1333  else if (stage != Types::kBoostProcEnd){
1334  Log() << kINFO << "<Train> average number of nodes before/after pruning : "
1335  << results->GetHist("NodesBeforePruning")->GetMean() << " / "
1336  << results->GetHist("NodesAfterPruning")->GetMean()
1337  << Endl;
1338  }
1339  }
1340 
1341  }else if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kFisher) {
1342  if (stage == Types::kAfterBoosting){
1344  }
1345  }else{
1346  if (methodIndex < 3){
1347  Log() << kDEBUG << "No detailed boost monitoring for "
1348  << GetCurrentMethod(methodIndex)->GetMethodName()
1349  << " yet available " << Endl;
1350  }
1351  }
1352 
1353  //boosting plots universal for all classifiers 'typically for debug purposes only as they are not general enough'
1354 
1355  if (stage == Types::kBeforeBoosting){
1356  // if you want to display the weighted events for 2D case at each boost step:
1357  if (fDetailedMonitoring){
1358  // the following code is useful only for 2D examples - mainly illustration for debug/educational purposes:
1359  if (DataInfo().GetNVariables() == 2) {
1360  results->Store(new TH2F(Form("EventDistSig_%d",methodIndex),Form("EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1361  results->GetHist(Form("EventDistSig_%d",methodIndex))->SetMarkerColor(4);
1362  results->Store(new TH2F(Form("EventDistBkg_%d",methodIndex),Form("EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1363  results->GetHist(Form("EventDistBkg_%d",methodIndex))->SetMarkerColor(2);
1364 
1366  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1367  const Event* ev = GetEvent(ievt);
1368  Float_t w = ev->GetWeight();
1369  Float_t v0= ev->GetValue(0);
1370  Float_t v1= ev->GetValue(1);
1371  // if (ievt<3) std::cout<<ievt<<" var0="<<v0<<" var1="<<v1<<" weight="<<w<<std::endl;
1372  TH2* h;
1373  if (DataInfo().IsSignal(ev)) h=results->GetHist2D(Form("EventDistSig_%d",methodIndex));
1374  else h=results->GetHist2D(Form("EventDistBkg_%d",methodIndex));
1375  if (h) h->Fill(v0,v1,w);
1376  }
1377  }
1378  }
1379  }
1380 
1381  return;
1382 }
1383 
1384 
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:373
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3251
void SetMsgType(EMsgType t)
Definition: Configurable.h:125
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
static long int sum(long int i)
Definition: Factory.cxx:2258
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
float xmin
Definition: THbookFile.cxx:93
Random number generator class based on M.
Definition: TRandom3.h:27
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
Definition: MethodBoost.h:191
THist< 1, int, THistStatContent > TH1I
Definition: THist.hxx:287
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
Definition: TRandom.cxx:435
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Singleton class for Global types used by TMVA.
Definition: Types.h:73
long long Long64_t
Definition: RtypesCore.h:69
auto * m
Definition: textangle.C:8
Double_t fROC_training
Definition: MethodBoost.h:185
void SingleTrain()
initialization
Stat_t GetSum() const
Definition: TArrayD.h:46
TString GetMethodName(Types::EMVA method) const
Definition: Types.cxx:136
std::vector< TH1 *> fTestSigMVAHist
Definition: MethodBoost.h:176
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
Double_t Log(Double_t x)
Definition: TMath.h:759
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
float Float_t
Definition: RtypesCore.h:53
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:70
Int_t GetBoostNum()
Definition: MethodBoost.h:88
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4374
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:285
Config & gConfig()
MsgLogger & Log() const
Definition: Configurable.h:122
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Bool_t fDetailedMonitoring
Definition: MethodBoost.h:157
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4770
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
EAnalysisType
Definition: Types.h:127
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Definition: MethodBoost.cxx:94
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
Definition: MethodBase.h:356
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:6930
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
Basic string class.
Definition: TString.h:131
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:567
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:385
Ranking for variables in method (implementation)
Definition: Ranking.h:48
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1100
int Int_t
Definition: RtypesCore.h:41
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
bool Bool_t
Definition: RtypesCore.h:59
std::vector< TH1 *> fTrainBgdMVAHist
Definition: MethodBoost.h:171
const Ranking * CreateRanking()
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:8445
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:369
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:859
void SetMethodDir(TDirectory *methodDir)
Definition: MethodBase.h:363
Double_t fOverlap_integral
Definition: MethodBoost.h:189
static void InhibitOutput()
Definition: MsgLogger.cxx:74
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
void AddEvent(Double_t val, Double_t weight, Int_t type)
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
const Event * GetEvent() const
Definition: MethodBase.h:740
std::vector< Double_t > fMethodWeight
DataSet * Data() const
Definition: MethodBase.h:400
Virtual base class for combining several TMVA method.
virtual ~MethodBoost(void)
destructor
TString fWeightFileDir
Definition: Config.h:112
UInt_t GetClass() const
Definition: Event.h:81
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
Definition: TAttMarker.h:38
Int_t GetNNodes()
Definition: MethodDT.h:97
IONames & GetIONames()
Definition: Config.h:90
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:395
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:411
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
Class that contains all the data information.
Definition: DataSetInfo.h:60
TFile * GetFile() const
Definition: MethodBase.h:361
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:408
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:134
virtual void Delete(Option_t *option="")
Delete this object.
Definition: TObject.cxx:169
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9298
TString fHistoricOption
Definition: MethodBoost.h:194
RooCmdArg Timer(Bool_t flag=kTRUE)
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
SVector< double, 2 > v
Definition: Dict.h:5
const char * GetName() const
Definition: MethodBase.h:325
ClassInfo * GetClassInfo(Int_t clNum) const
class TMVA::Config::VariablePlotting fVariablePlotting
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Double_t fBaggedSampleFraction
Definition: MethodBoost.h:161
Implementation of the GiniIndex as separation criterion.
Definition: GiniIndex.h:63
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8514
TString fTransformString
Definition: MethodBoost.h:156
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
Ssiz_t Length() const
Definition: TString.h:405
void ScaleBoostWeight(Double_t s) const
Definition: Event.h:107
const TString & GetJobName() const
Definition: MethodBase.h:321
const TString & GetMethodName() const
Definition: MethodBase.h:322
TAxis * GetYaxis()
Definition: TH1.h:316
float xmax
Definition: THbookFile.cxx:93
Tools & gTools()
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:610
virtual TDirectory * GetDirectory(const char *apath, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory named "apath".
Bool_t IsSilentFile()
Definition: MethodBase.h:370
REAL epsilon
Definition: triangle.c:617
#define h(i)
Definition: RSha256.hxx:106
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
void CreateMVAHistorgrams()
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Definition: TMath.cxx:448
UInt_t GetNVariables() const
Definition: MethodBase.h:336
const Bool_t kFALSE
Definition: RtypesCore.h:88
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
Class for categorizing the phase space.
TString & Remove(Ssiz_t pos)
Definition: TString.h:668
int Ssiz_t
Definition: RtypesCore.h:63
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString fBoostedMethodOptions
Definition: MethodBoost.h:165
Double_t Exp(Double_t x)
Definition: TMath.h:726
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:438
Bool_t fMonitorBoostedMethod
Definition: MethodBoost.h:167
#define ClassImp(name)
Definition: Rtypes.h:359
void RerouteTransformationHandler(TransformationHandler *fTargetTransformation)
Definition: MethodBase.h:394
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:34
std::vector< TH1 *> fTrainSigMVAHist
Definition: MethodBoost.h:170
TString fBoostedMethodTitle
Definition: MethodBoost.h:164
TH1 * GetHist(const TString &alias) const
Definition: Results.cxx:136
int type
Definition: TGX11.cxx:120
void SetBoostWeight(Double_t w) const
Definition: Event.h:106
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
The TH1 histogram class.
Definition: TH1.h:56
Double_t fMethodError
Definition: MethodBoost.h:183
void AddPreDefVal(const T &)
Definition: Configurable.h:168
void GetHelpMessage() const
Get help message text.
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------— ...
UInt_t GetNumber() const
Definition: ClassInfo.h:65
Int_t GetNNodesBeforePruning()
Definition: MethodDT.h:96
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:67
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:428
void ComputeStat(const std::vector< TMVA::Event *> &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:214
const TString & GetOptions() const
Definition: Configurable.h:84
virtual void TestClassification()
initialization
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1711
TString fBoostedMethodName
Definition: MethodBoost.h:163
#define REGISTER_METHOD(CLASS)
for example
std::vector< IMethod * > fMethods
Abstract ClassifierFactory template that handles arbitrary types.
Double_t fAdaBoostBeta
Definition: MethodBoost.h:159
Double_t GetMVAProbAt(Double_t value)
TH2 * GetHist2D(const TString &alias) const
Definition: Results.cxx:145
DataSetManager * fDataSetManager
Definition: MethodBoost.h:193
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:497
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
Class that is the base-class for a vector of result.
Definition: Results.h:57
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:200
const Event * fTmpEvent
Definition: MethodBase.h:403
void SetWeightFileDir(TString fileDir)
set directory of weight file
Double_t GetOriginalWeight() const
Definition: Event.h:79
Bool_t fHistoricBoolOption
Definition: MethodBoost.h:195
void InitHistos()
initialisation routine
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:351
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Definition: THist.hxx:284
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
Bool_t IsSignal(const Event *ev) const
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:190
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:428
A TTree object has a header with a name and a title.
Definition: TTree.h:70
TTree * fMonitorTree
Definition: MethodBoost.h:181
void Store(TObject *obj, const char *alias=0)
Definition: Results.cxx:86
virtual Int_t GetNbinsX() const
Definition: TH1.h:291
std::vector< TH1 *> fBTrainSigMVAHist
Definition: MethodBoost.h:173
static void EnableOutput()
Definition: MsgLogger.cxx:75
Double_t fBoostWeight
Definition: MethodBoost.h:182
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Definition: THist.hxx:291
std::vector< TH1 *> fBTrainBgdMVAHist
Definition: MethodBoost.h:174
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
const Bool_t kTRUE
Definition: RtypesCore.h:87
Types::EMVA GetMethodType() const
Definition: MethodBase.h:324
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
virtual void TestClassification()
initialization
const Event * GetEvent() const
Definition: DataSet.cxx:202
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:427
std::vector< TH1 *> fTestBgdMVAHist
Definition: MethodBoost.h:178
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:315
Analysis of Boosted Decision Trees.
Definition: MethodDT.h:49
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:841
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:355
const char * Data() const
Definition: TString.h:364
Bool_t IsModelPersistence()
Definition: MethodBase.h:374