Logo ROOT   6.07/09
Reference Guide
MethodBoost.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCompositeBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * U. of Victoria, Canada *
24  * MPI-K Heidelberg, Germany *
25  * U. of Bonn, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  **********************************************************************************/
31 
32 //_______________________________________________________________________
33 //
34 // This class is meant to boost a single classifier. Boosting means //
35 // training the classifier a few times. Everytime the wieghts of the //
36 // events are modified according to how well the classifier performed //
37 // on the test sample. //
38 ////////////////////////////////////////////////////////////////////////////////
39 
40 #include "TMVA/MethodBoost.h"
41 
42 #include "TMVA/ClassifierFactory.h"
43 #include "TMVA/Config.h"
44 #include "TMVA/Configurable.h"
45 #include "TMVA/DataSet.h"
46 #include "TMVA/DataSetInfo.h"
47 #include "TMVA/IMethod.h"
48 #include "TMVA/MethodBase.h"
49 #include "TMVA/MethodCategory.h"
51 #include "TMVA/MethodDT.h"
52 #include "TMVA/MethodFisher.h"
53 #include "TMVA/PDF.h"
54 #include "TMVA/Results.h"
55 #include "TMVA/Timer.h"
56 #include "TMVA/Tools.h"
57 #include "TMVA/Types.h"
58 
59 #include "TMVA/SeparationBase.h"
61 #include "TMVA/GiniIndex.h"
62 #include "TMVA/CrossEntropy.h"
65 
66 #include "Riostream.h"
67 #include "TRandom3.h"
68 #include "TFile.h"
69 #include "TMath.h"
70 #include "TObjString.h"
71 #include "TH1F.h"
72 #include "TH2F.h"
73 #include "TGraph.h"
74 #include "TSpline.h"
75 #include "TDirectory.h"
76 #include "TTree.h"
77 
78 #include <algorithm>
79 #include <iomanip>
80 #include <vector>
81 #include <cmath>
82 
83 
84 REGISTER_METHOD(Boost)
85 
86 ClassImp(TMVA::MethodBoost)
87 
88 ////////////////////////////////////////////////////////////////////////////////
89 
90  TMVA::MethodBoost::MethodBoost( const TString& jobName,
91  const TString& methodTitle,
92  DataSetInfo& theData,
93  const TString& theOption ) :
94  TMVA::MethodCompositeBase( jobName, Types::kBoost, methodTitle, theData, theOption)
95  , fBoostNum(0)
96  , fDetailedMonitoring(kFALSE)
97  , fAdaBoostBeta(0)
98  , fRandomSeed(0)
99  , fBaggedSampleFraction(0)
100  , fBoostedMethodTitle(methodTitle)
101  , fBoostedMethodOptions(theOption)
102  , fMonitorBoostedMethod(kFALSE)
103  , fMonitorTree(0)
104  , fBoostWeight(0)
105  , fMethodError(0)
106  , fROC_training(0.0)
107  , fOverlap_integral(0.0)
108  , fMVAvalues(0)
109 {
110  fMVAvalues = new std::vector<Float_t>;
111 }
112 
113 ////////////////////////////////////////////////////////////////////////////////
114 
116  const TString& theWeightFile)
117  : TMVA::MethodCompositeBase( Types::kBoost, dsi, theWeightFile)
118  , fBoostNum(0)
119  , fDetailedMonitoring(kFALSE)
120  , fAdaBoostBeta(0)
121  , fRandomSeed(0)
122  , fBaggedSampleFraction(0)
123  , fBoostedMethodTitle("")
124  , fBoostedMethodOptions("")
125  , fMonitorBoostedMethod(kFALSE)
126  , fMonitorTree(0)
127  , fBoostWeight(0)
128  , fMethodError(0)
129  , fROC_training(0.0)
130  , fOverlap_integral(0.0)
131  , fMVAvalues(0)
132 {
133  fMVAvalues = new std::vector<Float_t>;
134 }
135 
136 ////////////////////////////////////////////////////////////////////////////////
137 /// destructor
138 
140 {
141  fMethodWeight.clear();
142 
143  // the histogram themselves are deleted when the file is closed
144 
145  fTrainSigMVAHist.clear();
146  fTrainBgdMVAHist.clear();
147  fBTrainSigMVAHist.clear();
148  fBTrainBgdMVAHist.clear();
149  fTestSigMVAHist.clear();
150  fTestBgdMVAHist.clear();
151 
152  if (fMVAvalues) {
153  delete fMVAvalues;
154  fMVAvalues = 0;
155  }
156 }
157 
158 
159 ////////////////////////////////////////////////////////////////////////////////
160 /// Boost can handle classification with 2 classes and regression with one regression-target
161 
163 {
164  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
165  // if (type == Types::kRegression && numberTargets == 1) return kTRUE;
166  return kFALSE;
167 }
168 
169 
170 ////////////////////////////////////////////////////////////////////////////////
171 
173 {
174  DeclareOptionRef( fBoostNum = 1, "Boost_Num",
175  "Number of times the classifier is boosted" );
176 
177  DeclareOptionRef( fMonitorBoostedMethod = kTRUE, "Boost_MonitorMethod",
178  "Write monitoring histograms for each boosted classifier" );
179 
180  DeclareOptionRef( fDetailedMonitoring = kFALSE, "Boost_DetailedMonitoring",
181  "Produce histograms for detailed boost monitoring" );
182 
183  DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
184  AddPreDefVal(TString("RealAdaBoost"));
185  AddPreDefVal(TString("AdaBoost"));
186  AddPreDefVal(TString("Bagging"));
187 
188  DeclareOptionRef(fBaggedSampleFraction=.6,"Boost_BaggedSampleFraction","Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
189 
190  DeclareOptionRef( fAdaBoostBeta = 1.0, "Boost_AdaBoostBeta",
191  "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
192 
193  DeclareOptionRef( fTransformString = "step", "Boost_Transform",
194  "Type of transform applied to every boosted method linear, log, step" );
195  AddPreDefVal(TString("step"));
196  AddPreDefVal(TString("linear"));
197  AddPreDefVal(TString("log"));
198  AddPreDefVal(TString("gauss"));
199 
200  DeclareOptionRef( fRandomSeed = 0, "Boost_RandomSeed",
201  "Seed for random number generator used for bagging" );
202 
204 }
205 
206 ////////////////////////////////////////////////////////////////////////////////
207 /// options that are used ONLY for the READER to ensure backward compatibility
208 /// they are hence without any effect (the reader is only reading the training
209 /// options that HAD been used at the training of the .xml weightfile at hand
210 
212 {
213 
215 
216  DeclareOptionRef( fHistoricOption = "ByError", "Boost_MethodWeightType",
217  "How to set the final weight of the boosted classifiers" );
218  AddPreDefVal(TString("ByError"));
219  AddPreDefVal(TString("Average"));
220  AddPreDefVal(TString("ByROC"));
221  AddPreDefVal(TString("ByOverlap"));
222  AddPreDefVal(TString("LastMethod"));
223 
224  DeclareOptionRef( fHistoricOption = "step", "Boost_Transform",
225  "Type of transform applied to every boosted method linear, log, step" );
226  AddPreDefVal(TString("step"));
227  AddPreDefVal(TString("linear"));
228  AddPreDefVal(TString("log"));
229  AddPreDefVal(TString("gauss"));
230 
231  // this option here
232  //DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
233  // still exists, but these two possible values
234  AddPreDefVal(TString("HighEdgeGauss"));
235  AddPreDefVal(TString("HighEdgeCoPara"));
236  // have been deleted .. hope that works :)
237 
238  DeclareOptionRef( fHistoricBoolOption, "Boost_RecalculateMVACut",
239  "Recalculate the classifier MVA Signallike cut at every boost iteration" );
240 
241 }
242 ////////////////////////////////////////////////////////////////////////////////
243 /// just registering the string from which the boosted classifier will be created
244 
245 Bool_t TMVA::MethodBoost::BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption )
246 {
248  fBoostedMethodTitle = methodTitle;
249  fBoostedMethodOptions = theOption;
250  TString opts=theOption;
251  opts.ToLower();
252  // if (opts.Contains("vartransform")) Log() << kFATAL << "It is not possible to use boost in conjunction with variable transform. Please remove either Boost_Num or VarTransform from the option string"<< methodTitle<<Endl;
253 
254  return kTRUE;
255 }
256 
257 ////////////////////////////////////////////////////////////////////////////////
258 
260 {
261 }
262 
263 ////////////////////////////////////////////////////////////////////////////////
264 /// initialisation routine
265 
267 {
268 
270 
271  results->Store(new TH1F("MethodWeight","Normalized Classifier Weight",fBoostNum,0,fBoostNum),"ClassifierWeight");
272  results->Store(new TH1F("BoostWeight","Boost Weight",fBoostNum,0,fBoostNum),"BoostWeight");
273  results->Store(new TH1F("ErrFraction","Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),"ErrorFraction");
274  if (fDetailedMonitoring){
275  results->Store(new TH1F("ROCIntegral_test","ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegral_test");
276  results->Store(new TH1F("ROCIntegralBoosted_test","ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_test");
277  results->Store(new TH1F("ROCIntegral_train","ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),"ROCIntegral_train");
278  results->Store(new TH1F("ROCIntegralBoosted_train","ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_train");
279  results->Store(new TH1F("OverlapIntegal_train","Overlap integral (training sample)",fBoostNum,0,fBoostNum),"Overlap");
280  }
281 
282 
283  results->GetHist("ClassifierWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
284  results->GetHist("ClassifierWeight")->GetYaxis()->SetTitle("Classifier Weight");
285  results->GetHist("BoostWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
286  results->GetHist("BoostWeight")->GetYaxis()->SetTitle("Boost Weight");
287  results->GetHist("ErrorFraction")->GetXaxis()->SetTitle("Index of boosted classifier");
288  results->GetHist("ErrorFraction")->GetYaxis()->SetTitle("Error Fraction");
289  if (fDetailedMonitoring){
290  results->GetHist("ROCIntegral_test")->GetXaxis()->SetTitle("Index of boosted classifier");
291  results->GetHist("ROCIntegral_test")->GetYaxis()->SetTitle("ROC integral of single classifier");
292  results->GetHist("ROCIntegralBoosted_test")->GetXaxis()->SetTitle("Number of boosts");
293  results->GetHist("ROCIntegralBoosted_test")->GetYaxis()->SetTitle("ROC integral boosted");
294  results->GetHist("ROCIntegral_train")->GetXaxis()->SetTitle("Index of boosted classifier");
295  results->GetHist("ROCIntegral_train")->GetYaxis()->SetTitle("ROC integral of single classifier");
296  results->GetHist("ROCIntegralBoosted_train")->GetXaxis()->SetTitle("Number of boosts");
297  results->GetHist("ROCIntegralBoosted_train")->GetYaxis()->SetTitle("ROC integral boosted");
298  results->GetHist("Overlap")->GetXaxis()->SetTitle("Index of boosted classifier");
299  results->GetHist("Overlap")->GetYaxis()->SetTitle("Overlap integral");
300  }
301 
302  results->Store(new TH1F("SoverBtotal","S/B in reweighted training sample",fBoostNum,0,fBoostNum),"SoverBtotal");
303  results->GetHist("SoverBtotal")->GetYaxis()->SetTitle("S/B (boosted sample)");
304  results->GetHist("SoverBtotal")->GetXaxis()->SetTitle("Index of boosted classifier");
305 
306  results->Store(new TH1F("SeparationGain","SeparationGain",fBoostNum,0,fBoostNum),"SeparationGain");
307  results->GetHist("SeparationGain")->GetYaxis()->SetTitle("SeparationGain");
308  results->GetHist("SeparationGain")->GetXaxis()->SetTitle("Index of boosted classifier");
309 
310 
311 
312  fMonitorTree= new TTree("MonitorBoost","Boost variables");
313  fMonitorTree->Branch("iMethod",&fCurrentMethodIdx,"iMethod/I");
314  fMonitorTree->Branch("boostWeight",&fBoostWeight,"boostWeight/D");
315  fMonitorTree->Branch("errorFraction",&fMethodError,"errorFraction/D");
317 
318 }
319 
320 
321 ////////////////////////////////////////////////////////////////////////////////
322 
324 {
325  Log() << kDEBUG << "CheckSetup: fBoostType="<<fBoostType << Endl;
326  Log() << kDEBUG << "CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<Endl;
327  Log() << kDEBUG << "CheckSetup: fBoostWeight="<<fBoostWeight<<Endl;
328  Log() << kDEBUG << "CheckSetup: fMethodError="<<fMethodError<<Endl;
329  Log() << kDEBUG << "CheckSetup: fBoostNum="<<fBoostNum << Endl;
330  Log() << kDEBUG << "CheckSetup: fRandomSeed=" << fRandomSeed<< Endl;
331  Log() << kDEBUG << "CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<Endl;
332  Log() << kDEBUG << "CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<Endl;
333  Log() << kDEBUG << "CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod? "true" : "false") << Endl;
334  Log() << kDEBUG << "CheckSetup: MName=" << fBoostedMethodName << " Title="<< fBoostedMethodTitle<< Endl;
335  Log() << kDEBUG << "CheckSetup: MOptions="<< fBoostedMethodOptions << Endl;
336  Log() << kDEBUG << "CheckSetup: fMonitorTree=" << fMonitorTree <<Endl;
337  Log() << kDEBUG << "CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx << Endl;
338  if (fMethods.size()>0) Log() << kDEBUG << "CheckSetup: fMethods[0]" <<fMethods[0]<<Endl;
339  Log() << kDEBUG << "CheckSetup: fMethodWeight.size()" << fMethodWeight.size() << Endl;
340  if (fMethodWeight.size()>0) Log() << kDEBUG << "CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<Endl;
341  Log() << kDEBUG << "CheckSetup: trying to repair things" << Endl;
342 
343 }
344 ////////////////////////////////////////////////////////////////////////////////
345 
347 {
348  TDirectory* methodDir( 0 );
349  TString dirName,dirTitle;
350  Int_t StopCounter=0;
352 
353 
354  InitHistos();
355 
356  if (Data()->GetNTrainingEvents()==0) Log() << kFATAL << "<Train> Data() has zero events" << Endl;
358 
359  if (fMethods.size() > 0) fMethods.clear();
360  fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0);
361 
362  Log() << kINFO << "Training "<< fBoostNum << " " << fBoostedMethodName << " with title " << fBoostedMethodTitle << " Classifiers ... patience please" << Endl;
364 
366 
367  // clean boosted method options
369 
370 
371  // remove transformations for individual boosting steps
372  // the transformation of the main method will be rerouted to each of the boost steps
373  Ssiz_t varTrafoStart=fBoostedMethodOptions.Index("~VarTransform=");
374  if (varTrafoStart >0) {
375  Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(":",varTrafoStart);
376  if (varTrafoEnd<varTrafoStart)
377  varTrafoEnd=fBoostedMethodOptions.Length();
378  fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
379  }
380 
381  //
382  // training and boosting the classifiers
384  // the first classifier shows the option string output, the rest not
386 
388  GetJobName(),
390  DataInfo(),
393 
394  // supressing the rest of the classifier output the right way
395  fCurrentMethod = (dynamic_cast<MethodBase*>(method));
396 
397  if (fCurrentMethod==0) {
398  Log() << kFATAL << "uups.. guess the booking of the " << fCurrentMethodIdx << "-th classifier somehow failed" << Endl;
399  return; // hope that makes coverity happy (as if fears I migh use the pointer later on, not knowing that FATAL exits
400  }
401 
402  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
403  if (fCurrentMethod->GetMethodType() == Types::kCategory) { // DSMTEST
404  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(fCurrentMethod)); // DSMTEST
405  if (!methCat) // DSMTEST
406  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" << Endl; // DSMTEST
407  methCat->fDataSetManager = fDataSetManager; // DSMTEST
408  } // DSMTEST
409 
413  // put SetAnalysisType here for the needs of MLP
417 
418 
419  // reroute transformationhandler
421 
422 
423  // creating the directory of the classifier
424  if(!IsSilentFile())
425  {
426  if (fMonitorBoostedMethod) {
427  methodDir=GetFile()->GetDirectory(dirName=Form("%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
428  if (methodDir==0) {
429  methodDir=BaseDir()->mkdir(dirName,dirTitle=Form("Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
430  }
431  fCurrentMethod->SetMethodDir(methodDir);
432  fCurrentMethod->BaseDir()->cd();
433  }
434  }
435 
436  // training
437  TMVA::MethodCompositeBase::fMethods.push_back(method);
441  TMVA::MsgLogger::InhibitOutput(); //supressing Logger outside the method
442  if (fBoostType=="Bagging") Bagging(); // you want also to train the first classifier on a bagged sample
443  SingleTrain();
446 
447  // calculate MVA values of current method for all events in training sample
448  // (used later on to get 'misclassified events' etc for the boosting
449  CalcMVAValues();
450 
452 
453  // get ROC integral and overlap integral for single method on
454  // training sample if fMethodWeightType == "ByROC" or the user
455  // wants detailed monitoring
456 
457  // boosting (reweight training sample)
460 
462  results->GetHist("BoostWeight")->SetBinContent(fCurrentMethodIdx+1,fBoostWeight);
463  results->GetHist("ErrorFraction")->SetBinContent(fCurrentMethodIdx+1,fMethodError);
464 
465  if (fDetailedMonitoring) {
468  results->GetHist("ROCIntegralBoosted_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTesting));
469  results->GetHist("ROCIntegral_train")->SetBinContent(fCurrentMethodIdx+1, fROC_training);
470  results->GetHist("ROCIntegralBoosted_train")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTraining));
471  results->GetHist("Overlap")->SetBinContent(fCurrentMethodIdx+1, fOverlap_integral);
472  }
473 
474 
475 
476  fMonitorTree->Fill();
477 
478  // stop boosting if needed when error has reached 0.5
479  // thought of counting a few steps, but it doesn't seem to be necessary
480  Log() << kDEBUG << "AdaBoost (methodErr) err = " << fMethodError << Endl;
481  if (fMethodError > 0.49999) StopCounter++;
482  if (StopCounter > 0 && fBoostType != "Bagging") {
483  timer.DrawProgressBar( fBoostNum );
484  fBoostNum = fCurrentMethodIdx+1;
485  Log() << kINFO << "Error rate has reached 0.5 ("<< fMethodError<<"), boosting process stopped at #" << fBoostNum << " classifier" << Endl;
486  if (fBoostNum < 5)
487  Log() << kINFO << "The classifier might be too strong to boost with Beta = " << fAdaBoostBeta << ", try reducing it." <<Endl;
488  break;
489  }
490  }
491 
492  //as MethodBoost acts not on a private event sample (like MethodBDT does), we need to remember not
493  // to leave "boosted" events to the next classifier in the factory
494 
496 
497  Timer* timer1= new Timer( fBoostNum, GetName() );
498  // normalizing the weights of the classifiers
500  // pefroming post-boosting actions
501 
503 
504  if (fCurrentMethodIdx==fBoostNum) {
505  Log() << kINFO << "Elapsed time: " << timer1->GetElapsedTime()
506  << " " << Endl;
507  }
508 
509  TH1F* tmp = dynamic_cast<TH1F*>( results->GetHist("ClassifierWeight") );
511 
512  }
513 
514  // Ensure that in case of only 1 boost the method weight equals
515  // 1.0. This avoids unexpected behaviour in case of very bad
516  // classifiers which have fBoostWeight=1 or fMethodError=0.5,
517  // because their weight would be set to zero. This behaviour is
518  // not ok if one boosts just one time.
519  if (fMethods.size()==1) fMethodWeight[0] = 1.0;
520 
522 
523  delete timer1;
524 }
525 
526 ////////////////////////////////////////////////////////////////////////////////
527 
529 {
531 }
532 
533 ////////////////////////////////////////////////////////////////////////////////
534 
536 {
537  if (fBoostNum <=0) Log() << kFATAL << "CreateHistorgrams called before fBoostNum is initialized" << Endl;
538  // calculating histograms boundries and creating histograms..
539  // nrms = number of rms around the average to use for outline (of the 0 classifier)
540  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
541  Int_t signalClass = 0;
542  if (DataInfo().GetClassInfo("Signal") != 0) {
543  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
544  }
546  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
547 
549  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
550  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.00001;
551 
552  // creating all the historgrams
553  for (UInt_t imtd=0; imtd<fBoostNum; imtd++) {
554  fTrainSigMVAHist .push_back( new TH1F( Form("MVA_Train_S_%04i",imtd), "MVA_Train_S", fNbins, xmin, xmax ) );
555  fTrainBgdMVAHist .push_back( new TH1F( Form("MVA_Train_B%04i", imtd), "MVA_Train_B", fNbins, xmin, xmax ) );
556  fBTrainSigMVAHist.push_back( new TH1F( Form("MVA_BTrain_S%04i",imtd), "MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
557  fBTrainBgdMVAHist.push_back( new TH1F( Form("MVA_BTrain_B%04i",imtd), "MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
558  fTestSigMVAHist .push_back( new TH1F( Form("MVA_Test_S%04i", imtd), "MVA_Test_S", fNbins, xmin, xmax ) );
559  fTestBgdMVAHist .push_back( new TH1F( Form("MVA_Test_B%04i", imtd), "MVA_Test_B", fNbins, xmin, xmax ) );
560  }
561 }
562 
563 ////////////////////////////////////////////////////////////////////////////////
564 /// resetting back the boosted weights of the events to 1
565 
567 {
568  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
569  const Event *ev = Data()->GetEvent(ievt);
570  ev->SetBoostWeight( 1.0 );
571  }
572 }
573 
574 ////////////////////////////////////////////////////////////////////////////////
575 
577 {
578  TDirectory* dir=0;
579  if (fMonitorBoostedMethod) {
580  for (UInt_t imtd=0;imtd<fBoostNum;imtd++) {
581 
582  //writing the histograms in the specific classifier's directory
583  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[imtd]);
584  if (!m) continue;
585  dir = m->BaseDir();
586  dir->cd();
587  fTrainSigMVAHist[imtd]->SetDirectory(dir);
588  fTrainSigMVAHist[imtd]->Write();
589  fTrainBgdMVAHist[imtd]->SetDirectory(dir);
590  fTrainBgdMVAHist[imtd]->Write();
591  fBTrainSigMVAHist[imtd]->SetDirectory(dir);
592  fBTrainSigMVAHist[imtd]->Write();
593  fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
594  fBTrainBgdMVAHist[imtd]->Write();
595  }
596  }
597 
598  // going back to the original folder
599  BaseDir()->cd();
600 
601  fMonitorTree->Write();
602 }
603 
604 ////////////////////////////////////////////////////////////////////////////////
605 
607 {
609  if (fMonitorBoostedMethod) {
610  UInt_t nloop = fTestSigMVAHist.size();
611  if (fMethods.size()<nloop) nloop = fMethods.size();
612  //running over all the events and populating the test MVA histograms
614  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
615  const Event* ev = GetEvent(ievt);
616  Float_t w = ev->GetWeight();
617  if (DataInfo().IsSignal(ev)) {
618  for (UInt_t imtd=0; imtd<nloop; imtd++) {
619  fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
620  }
621  }
622  else {
623  for (UInt_t imtd=0; imtd<nloop; imtd++) {
624  fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
625  }
626  }
627  }
629  }
630 }
631 
632 ////////////////////////////////////////////////////////////////////////////////
633 
635 {
637  if (treetype==Types::kTraining) return;
638  UInt_t nloop = fTestSigMVAHist.size();
639  if (fMethods.size()<nloop) nloop = fMethods.size();
640  if (fMonitorBoostedMethod) {
641  TDirectory* dir=0;
642  for (UInt_t imtd=0;imtd<nloop;imtd++) {
643  //writing the histograms in the specific classifier's directory
644  MethodBase* mva = dynamic_cast<MethodBase*>(fMethods[imtd]);
645  if (!mva) continue;
646  dir = mva->BaseDir();
647  if (dir==0) continue;
648  dir->cd();
649  fTestSigMVAHist[imtd]->SetDirectory(dir);
650  fTestSigMVAHist[imtd]->Write();
651  fTestBgdMVAHist[imtd]->SetDirectory(dir);
652  fTestBgdMVAHist[imtd]->Write();
653  }
654  }
655 }
656 
657 ////////////////////////////////////////////////////////////////////////////////
658 /// process user options
659 
661 {
662 }
663 
664 ////////////////////////////////////////////////////////////////////////////////
665 /// initialization
666 
668 {
670  MethodBase* meth = dynamic_cast<MethodBase*>(GetLastMethod());
671  if (meth){
672  meth->SetSilentFile(IsSilentFile());
673  if(IsModelPersistence()){
674  TString _fFileDir= DataInfo().GetName();
675  _fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
676  meth->SetWeightFileDir(_fFileDir);
677  }
679  meth->TrainMethod();
680  }
681 }
682 
683 ////////////////////////////////////////////////////////////////////////////////
684 /// find the CUT on the individual MVA that defines an event as
685 /// correct or misclassified (to be used in the boosting process)
686 
688 {
689  if (!method || method->GetMethodType() == Types::kDT ){ return;}
690 
691  // creating a fine histograms containing the error rate
692  const Int_t nBins=10001;
693  Double_t minMVA=150000;
694  Double_t maxMVA=-150000;
695  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
696  GetEvent(ievt);
697  Double_t val=method->GetMvaValue();
698  //Helge .. I think one could very well use fMVAValues for that ... -->to do
699  if (val>maxMVA) maxMVA=val;
700  if (val<minMVA) minMVA=val;
701  }
702  maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
703 
704  Double_t sum = 0.;
705 
706  TH1D *mvaS = new TH1D(Form("MVAS_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
707  TH1D *mvaB = new TH1D(Form("MVAB_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
708  TH1D *mvaSC = new TH1D(Form("MVASC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
709  TH1D *mvaBC = new TH1D(Form("MVABC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
710 
711 
713  if (fDetailedMonitoring){
714  results->Store(mvaS, Form("MVAS_%d",fCurrentMethodIdx));
715  results->Store(mvaB, Form("MVAB_%d",fCurrentMethodIdx));
716  results->Store(mvaSC,Form("MVASC_%d",fCurrentMethodIdx));
717  results->Store(mvaBC,Form("MVABC_%d",fCurrentMethodIdx));
718  }
719 
720  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
721 
722  Double_t weight = GetEvent(ievt)->GetWeight();
723  Double_t mvaVal=method->GetMvaValue();
724  sum +=weight;
725  if (DataInfo().IsSignal(GetEvent(ievt))){
726  mvaS->Fill(mvaVal,weight);
727  }else {
728  mvaB->Fill(mvaVal,weight);
729  }
730  }
731  SeparationBase *sepGain;
732 
733 
734  // Boosting should use Miscalssification not Gini Index (changed, Helge 31.5.2013)
735  // ACHTUNG !! mit "Misclassification" geht es NUR wenn man die Signal zu Background bei jedem Boost schritt
736  // wieder hinbiegt. Es gibt aber komischerweise bessere Ergebnisse (genau wie bei BDT auch schon beobachtet) wenn
737  // man GiniIndex benutzt und akzeptiert dass jedes andere mal KEIN vernuenftiger Cut gefunden wird - d.h. der
738  // Cut liegt dann ausserhalb der MVA value range, alle events sind als Bkg classifiziert und dann wird entpsrehcend
739  // des Boost algorithmus 'automitisch' etwas renormiert .. sodass im naechsten Schritt dann wieder was vernuenftiges
740  // rauskommt. Komisch .. dass DAS richtig sein soll ??
741 
742  // SeparationBase *sepGain2 = new MisClassificationError();
743  //sepGain = new MisClassificationError();
744  sepGain = new GiniIndex();
745  //sepGain = new CrossEntropy();
746 
747  Double_t sTot = mvaS->GetSum();
748  Double_t bTot = mvaB->GetSum();
749 
750  mvaSC->SetBinContent(1,mvaS->GetBinContent(1));
751  mvaBC->SetBinContent(1,mvaB->GetBinContent(1));
752  Double_t sSel=0;
753  Double_t bSel=0;
754  Double_t separationGain=sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
755  Double_t mvaCut=mvaSC->GetBinLowEdge(1);
756  Double_t sSelCut=sSel;
757  Double_t bSelCut=bSel;
758  // std::cout << "minMVA =" << minMVA << " maxMVA = " << maxMVA << " width = " << mvaSC->GetBinWidth(1) << std::endl;
759 
760  // for (Int_t ibin=1;ibin<=nBins;ibin++) std::cout << " cutvalues[" << ibin<<"]="<<mvaSC->GetBinLowEdge(ibin) << " " << mvaSC->GetBinCenter(ibin) << std::endl;
761  Double_t mvaCutOrientation=1; // 1 if mva > mvaCut --> Signal and -1 if mva < mvaCut (i.e. mva*-1 > mvaCut*-1) --> Signal
762  for (Int_t ibin=1;ibin<=nBins;ibin++){
763  mvaSC->SetBinContent(ibin,mvaS->GetBinContent(ibin)+mvaSC->GetBinContent(ibin-1));
764  mvaBC->SetBinContent(ibin,mvaB->GetBinContent(ibin)+mvaBC->GetBinContent(ibin-1));
765 
766  sSel=mvaSC->GetBinContent(ibin);
767  bSel=mvaBC->GetBinContent(ibin);
768 
769  // if (ibin==nBins){
770  // std::cout << "Last bin s="<< sSel <<" b="<<bSel << " s="<< sTot-sSel <<" b="<<bTot-bSel << endl;
771  // }
772 
773  if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
774  // && (mvaSC->GetBinCenter(ibin) >0 || (fCurrentMethodIdx+1)%2 )
775  ){
776  separationGain = sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
777  // mvaCut=mvaSC->GetBinCenter(ibin);
778  mvaCut=mvaSC->GetBinLowEdge(ibin+1);
779  // if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) mvaCutOrientation=-1;
780  if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
781  else mvaCutOrientation=1;
782  sSelCut=sSel;
783  bSelCut=bSel;
784  // std::cout << "new cut at " << mvaCut << "with s="<<sTot-sSel << " b="<<bTot-bSel << std::endl;
785  }
786  /*
787  Double_t ori;
788  if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) ori=-1;
789  else ori=1;
790  std::cout << ibin << " mvacut="<<mvaCut
791  << " sTot=" << sTot
792  << " bTot=" << bTot
793  << " sSel=" << sSel
794  << " bSel=" << bSel
795  << " s/b(1)=" << sSel/bSel
796  << " s/b(2)=" << (sTot-sSel)/(bTot-bSel)
797  << " sepGain="<<sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
798  << " sepGain2="<<sepGain2->GetSeparationGain(sSel,bSel,sTot,bTot)
799  << " " <<ori
800  << std::endl;
801  */
802 
803  }
804 
805  if (0){
806  double parentIndex=sepGain->GetSeparationIndex(sTot,bTot);
807  double leftIndex =sepGain->GetSeparationIndex(sSelCut,bSelCut);
808  double rightIndex =sepGain->GetSeparationIndex(sTot-sSelCut,bTot-bSelCut);
809  std::cout
810  << " sTot=" << sTot
811  << " bTot=" << bTot
812  << " s="<<sSelCut
813  << " b="<<bSelCut
814  << " s2="<<(sTot-sSelCut)
815  << " b2="<<(bTot-bSelCut)
816  << " s/b(1)=" << sSelCut/bSelCut
817  << " s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
818  << " index before cut=" << parentIndex
819  << " after: left=" << leftIndex
820  << " after: right=" << rightIndex
821  << " sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
822  << " sepGain="<<separationGain
823  << " sepGain="<<sepGain->GetSeparationGain(sSelCut,bSelCut,sTot,bTot)
824  << " cut=" << mvaCut
825  << " idx="<<fCurrentMethodIdx
826  << " cutOrientation="<<mvaCutOrientation
827  << std::endl;
828  }
829  method->SetSignalReferenceCut(mvaCut);
830  method->SetSignalReferenceCutOrientation(mvaCutOrientation);
831 
832  results->GetHist("SeparationGain")->SetBinContent(fCurrentMethodIdx+1,separationGain);
833 
834 
835  Log() << kDEBUG << "(old step) Setting method cut to " <<method->GetSignalReferenceCut()<< Endl;
836 
837  if(IsSilentFile())
838  {
839  mvaS ->Delete();
840  mvaB ->Delete();
841  mvaSC->Delete();
842  mvaBC->Delete();
843  }
844 }
845 
846 ////////////////////////////////////////////////////////////////////////////////
847 
849 {
850  Double_t returnVal=-1;
851 
852 
853  if (fBoostType=="AdaBoost") returnVal = this->AdaBoost (method,1);
854  else if (fBoostType=="RealAdaBoost") returnVal = this->AdaBoost (method,0);
855  else if (fBoostType=="Bagging") returnVal = this->Bagging ();
856  else{
857  Log() << kFATAL << "<Boost> unknown boost option " << fBoostType<< " called" << Endl;
858  }
859  fMethodWeight.push_back(returnVal);
860  return returnVal;
861 }
862 ////////////////////////////////////////////////////////////////////////////////
863 /// the standard (discrete or real) AdaBoost algorithm
864 
866 {
867  if (!method) {
868  Log() << kWARNING << " AdaBoost called without classifier reference - needed for calulating AdaBoost " << Endl;
869  return 0;
870  }
871 
872  Float_t w,v; Bool_t sig=kTRUE;
873  Double_t sumAll=0, sumWrong=0;
874  Bool_t* WrongDetection=new Bool_t[GetNEvents()];
875  QuickMVAProbEstimator *MVAProb=NULL;
876 
877  if (discreteAdaBoost) {
878  FindMVACut(method);
879  Log() << kDEBUG << " individual mva cut value = " << method->GetSignalReferenceCut() << Endl;
880  } else {
881  MVAProb=new TMVA::QuickMVAProbEstimator();
882  // the RealAdaBoost does use a simple "yes (signal)" or "no (background)"
883  // answer from your single MVA, but a "signal probability" instead (in the BDT case,
884  // that would be the 'purity' in the leaf node. For some MLP parameter, the MVA output
885  // can also interpreted as a probability, but here I try a genera aproach to get this
886  // probability from the MVA distributions...
887 
888  for (Long64_t evt=0; evt<GetNEvents(); evt++) {
889  const Event* ev = Data()->GetEvent(evt);
890  MVAProb->AddEvent(fMVAvalues->at(evt),ev->GetWeight(),ev->GetClass());
891  }
892  }
893 
894 
895  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=kTRUE;
896 
897  // finding the wrong events and calculating their total weights
898  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
899  const Event* ev = GetEvent(ievt);
900  sig=DataInfo().IsSignal(ev);
901  v = fMVAvalues->at(ievt);
902  w = ev->GetWeight();
903  sumAll += w;
904  if(!IsSilentFile())
905  {
906  if (fMonitorBoostedMethod) {
907  if (sig) {
910  }
911  else {
914  }
915  }
916  }
917 
918  if (discreteAdaBoost){
919  if (sig == method->IsSignalLike(fMVAvalues->at(ievt))){
920  WrongDetection[ievt]=kFALSE;
921  }else{
922  WrongDetection[ievt]=kTRUE;
923  sumWrong+=w;
924  }
925  }else{
926  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
927  mvaProb = 2*(mvaProb-0.5);
928  Int_t trueType;
929  if (DataInfo().IsSignal(ev)) trueType = 1;
930  else trueType = -1;
931  sumWrong+= w*trueType*mvaProb;
932  }
933  }
934 
935  fMethodError=sumWrong/sumAll;
936 
937  // calculating the fMethodError and the boostWeight out of it uses the formula
938  // w = ((1-err)/err)^beta
939 
940  Double_t boostWeight=0;
941 
942  if (fMethodError == 0) { //no misclassification made.. perfect, no boost ;)
943  Log() << kWARNING << "Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " << Endl;
944  }else{
945 
946  if (discreteAdaBoost)
948  else
949  boostWeight = TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
950 
951 
952  // std::cout << "boostweight = " << boostWeight << std::endl;
953 
954  // ADA boosting, rescaling the weight of the wrong events according to the error level
955  // over the entire test sample rescaling all the weights to have the same sum, but without
956  // touching the original weights (changing only the boosted weight of all the events)
957  // first reweight
958 
959  Double_t newSum=0., oldSum=0.;
960 
961 
962  Double_t boostfactor = TMath::Exp(boostWeight);
963 
964 
965  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
966  const Event* ev = Data()->GetEvent(ievt);
967  oldSum += ev->GetWeight();
968  if (discreteAdaBoost){
969  // events are classified as Signal OR background .. right or wrong
970  if (WrongDetection[ievt] && boostWeight != 0) {
971  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
972  else ev->ScaleBoostWeight(1./boostfactor);
973  }
974  // if (ievt<30) std::cout<<ievt<<" var0="<<ev->GetValue(0)<<" var1="<<ev->GetValue(1)<<" weight="<<ev->GetWeight() << " boostby:"<<boostfactor<<std::endl;
975 
976  }else{
977  // events are classified by their probability of being signal or background
978  // (eventually you should write this one - i.e. re-use the MVA value that were already
979  // calcualted and stroed.. however ,for the moement ..
980  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
981  mvaProb = 2*(mvaProb-0.5);
982  // mvaProb = (1-mvaProb);
983 
984  Int_t trueType=1;
985  if (DataInfo().IsSignal(ev)) trueType = 1;
986  else trueType = -1;
987 
988  boostfactor = TMath::Exp(-1*boostWeight*trueType*mvaProb);
989  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
990  else ev->ScaleBoostWeight(1./boostfactor);
991 
992  }
993  newSum += ev->GetWeight();
994  }
995 
996  Double_t normWeight = oldSum/newSum;
997  // next normalize the weights
998  Double_t normSig=0, normBkg=0;
999  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1000  const Event* ev = Data()->GetEvent(ievt);
1001  ev->ScaleBoostWeight(normWeight);
1002  if (ev->GetClass()) normSig+=ev->GetWeight();
1003  else normBkg+=ev->GetWeight();
1004  }
1005 
1007  results->GetHist("SoverBtotal")->SetBinContent(fCurrentMethodIdx+1, normSig/normBkg);
1008 
1009  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1010  const Event* ev = Data()->GetEvent(ievt);
1011 
1012  if (ev->GetClass()) ev->ScaleBoostWeight(oldSum/normSig/2);
1013  else ev->ScaleBoostWeight(oldSum/normBkg/2);
1014  }
1015  }
1016 
1017  delete[] WrongDetection;
1018  if (MVAProb) delete MVAProb;
1019 
1020  fBoostWeight = boostWeight; // used ONLY for the monitoring tree
1021 
1022  return boostWeight;
1023 }
1024 
1025 
1026 ////////////////////////////////////////////////////////////////////////////////
1027 /// Bagging or Bootstrap boosting, gives new random poisson weight for every event
1028 
1030 {
1031  TRandom3 *trandom = new TRandom3(fRandomSeed+fMethods.size());
1032  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1033  const Event* ev = Data()->GetEvent(ievt);
1035  }
1036  fBoostWeight = 1; // used ONLY for the monitoring tree
1037  return 1.;
1038 }
1039 
1040 
1041 ////////////////////////////////////////////////////////////////////////////////
1042 /// Get help message text
1043 ///
1044 /// typical length of text line:
1045 /// "|--------------------------------------------------------------|"
1046 
1048 {
1049  Log() << Endl;
1050  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1051  Log() << Endl;
1052  Log() << "This method combines several classifier of one species in a "<<Endl;
1053  Log() << "single multivariate quantity via the boost algorithm." << Endl;
1054  Log() << "the output is a weighted sum over all individual classifiers" <<Endl;
1055  Log() << "By default, the AdaBoost method is employed, which gives " << Endl;
1056  Log() << "events that were misclassified in the previous tree a larger " << Endl;
1057  Log() << "weight in the training of the following classifier."<<Endl;
1058  Log() << "Optionally, Bagged boosting can also be applied." << Endl;
1059  Log() << Endl;
1060  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1061  Log() << Endl;
1062  Log() << "The most important parameter in the configuration is the "<<Endl;
1063  Log() << "number of boosts applied (Boost_Num) and the choice of boosting"<<Endl;
1064  Log() << "(Boost_Type), which can be set to either AdaBoost or Bagging." << Endl;
1065  Log() << "AdaBoosting: The most important parameters in this configuration" <<Endl;
1066  Log() << "is the beta parameter (Boost_AdaBoostBeta) " << Endl;
1067  Log() << "When boosting a linear classifier, it is sometimes advantageous"<<Endl;
1068  Log() << "to transform the MVA output non-linearly. The following options" <<Endl;
1069  Log() << "are available: step, log, and minmax, the default is no transform."<<Endl;
1070  Log() <<Endl;
1071  Log() << "Some classifiers are hard to boost and do not improve much in"<<Endl;
1072  Log() << "their performance by boosting them, some even slightly deteriorate"<< Endl;
1073  Log() << "due to the boosting." <<Endl;
1074  Log() << "The booking of the boost method is special since it requires"<<Endl;
1075  Log() << "the booing of the method to be boosted and the boost itself."<<Endl;
1076  Log() << "This is solved by booking the method to be boosted and to add"<<Endl;
1077  Log() << "all Boost parameters, which all begin with \"Boost_\" to the"<<Endl;
1078  Log() << "options string. The factory separates the options and initiates"<<Endl;
1079  Log() << "the boost process. The TMVA macro directory contains the example"<<Endl;
1080  Log() << "macro \"Boost.C\"" <<Endl;
1081 }
1082 
1083 ////////////////////////////////////////////////////////////////////////////////
1084 
1086 {
1087  return 0;
1088 }
1089 
1090 ////////////////////////////////////////////////////////////////////////////////
1091 /// return boosted MVA response
1092 
1094 {
1095  Double_t mvaValue = 0;
1096  Double_t norm = 0;
1097  Double_t epsilon = TMath::Exp(-1.);
1098  //Double_t fact = TMath::Exp(-1.)+TMath::Exp(1.);
1099  for (UInt_t i=0;i< fMethods.size(); i++){
1100  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[i]);
1101  if (m==0) continue;
1102  Double_t val = fTmpEvent ? m->GetMvaValue(fTmpEvent) : m->GetMvaValue();
1103  Double_t sigcut = m->GetSignalReferenceCut();
1104 
1105  // default is no transform
1106  if (fTransformString == "linear"){
1107 
1108  }
1109  else if (fTransformString == "log"){
1110  if (val < sigcut) val = sigcut;
1111 
1112  val = TMath::Log((val-sigcut)+epsilon);
1113  }
1114  else if (fTransformString == "step" ){
1115  if (m->IsSignalLike(val)) val = 1.;
1116  else val = -1.;
1117  }
1118  else if (fTransformString == "gauss"){
1119  val = TMath::Gaus((val-sigcut),1);
1120  }
1121  else {
1122  Log() << kFATAL << "error unknown transformation " << fTransformString<<Endl;
1123  }
1124  mvaValue+=val*fMethodWeight[i];
1125  norm +=fMethodWeight[i];
1126  // std::cout << "mva("<<i<<") = "<<val<<" " << valx<< " " << mvaValue<<" and sigcut="<<sigcut << std::endl;
1127  }
1128  mvaValue/=norm;
1129  // cannot determine error
1130  NoErrorCalc(err, errUpper);
1131 
1132  return mvaValue;
1133 }
1134 
1135 ////////////////////////////////////////////////////////////////////////////////
1136 /// Calculate the ROC integral of a single classifier or even the
1137 /// whole boosted classifier. The tree type (training or testing
1138 /// sample) is specified by 'eTT'.
1139 ///
1140 /// If tree type kTraining is set, the original training sample is
1141 /// used to compute the ROC integral (original weights).
1142 ///
1143 /// - singleMethod - if kTRUE, return ROC integral of single (last
1144 /// trained) classifier; if kFALSE, return ROC
1145 /// integral of full classifier
1146 ///
1147 /// - eTT - tree type (Types::kTraining / Types::kTesting)
1148 ///
1149 /// - CalcOverlapIntergral - if kTRUE, the overlap integral of the
1150 /// signal/background MVA distributions
1151 /// is calculated and stored in
1152 /// 'fOverlap_integral'
1153 
1155 {
1156  // set data sample training / testing
1157  Data()->SetCurrentType(eTT);
1158 
1159  MethodBase* method = singleMethod ? dynamic_cast<MethodBase*>(fMethods.back()) : 0; // ToDo CoVerity flags this line as there is no prtection against a zero-pointer delivered by dynamic_cast
1160  // to make CoVerity happy (although, OF COURSE, the last method in the commitee
1161  // has to be also of type MethodBase as ANY method is... hence the dynamic_cast
1162  // will never by "zero" ...
1163  if (singleMethod && !method) {
1164  Log() << kFATAL << " What do you do? Your method:"
1165  << fMethods.back()->GetName()
1166  << " seems not to be a propper TMVA method"
1167  << Endl;
1168  std::exit(1);
1169  }
1170  Double_t err = 0.0;
1171 
1172  // temporary renormalize the method weights in case of evaluation
1173  // of full classifier.
1174  // save the old normalization of the methods
1175  std::vector<Double_t> OldMethodWeight(fMethodWeight);
1176  if (!singleMethod) {
1177  // calculate sum of weights of all methods
1178  Double_t AllMethodsWeight = 0;
1179  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1180  AllMethodsWeight += fMethodWeight.at(i);
1181  // normalize the weights of the classifiers
1182  if (AllMethodsWeight != 0.0) {
1183  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1184  fMethodWeight[i] /= AllMethodsWeight;
1185  }
1186  }
1187 
1188  // calculate MVA values
1189  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
1190  std::vector <Float_t>* mvaRes;
1191  if (singleMethod && eTT==Types::kTraining)
1192  mvaRes = fMVAvalues; // values already calculated
1193  else {
1194  mvaRes = new std::vector <Float_t>(GetNEvents());
1195  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1196  GetEvent(ievt);
1197  (*mvaRes)[ievt] = singleMethod ? method->GetMvaValue(&err) : GetMvaValue(&err);
1198  }
1199  }
1200 
1201  // restore the method weights
1202  if (!singleMethod)
1203  fMethodWeight = OldMethodWeight;
1204 
1205  // now create histograms for calculation of the ROC integral
1206  Int_t signalClass = 0;
1207  if (DataInfo().GetClassInfo("Signal") != 0) {
1208  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1209  }
1210  gTools().ComputeStat( GetEventCollection(eTT), mvaRes,
1211  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
1212 
1214  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
1215  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.0001;
1216 
1217  // calculate ROC integral
1218  TH1* mva_s = new TH1F( "MVA_S", "MVA_S", fNbins, xmin, xmax );
1219  TH1* mva_b = new TH1F( "MVA_B", "MVA_B", fNbins, xmin, xmax );
1220  TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1221  if (CalcOverlapIntergral) {
1222  mva_s_overlap = new TH1F( "MVA_S_OVERLAP", "MVA_S_OVERLAP", fNbins, xmin, xmax );
1223  mva_b_overlap = new TH1F( "MVA_B_OVERLAP", "MVA_B_OVERLAP", fNbins, xmin, xmax );
1224  }
1225  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1226  const Event* ev = GetEvent(ievt);
1227  Float_t w = (eTT==Types::kTesting ? ev->GetWeight() : ev->GetOriginalWeight());
1228  if (DataInfo().IsSignal(ev)) mva_s->Fill( (*mvaRes)[ievt], w );
1229  else mva_b->Fill( (*mvaRes)[ievt], w );
1230 
1231  if (CalcOverlapIntergral) {
1232  Float_t w_ov = ev->GetWeight();
1233  if (DataInfo().IsSignal(ev))
1234  mva_s_overlap->Fill( (*mvaRes)[ievt], w_ov );
1235  else
1236  mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1237  }
1238  }
1239  gTools().NormHist( mva_s );
1240  gTools().NormHist( mva_b );
1241  PDF *fS = new PDF( "PDF Sig", mva_s, PDF::kSpline2 );
1242  PDF *fB = new PDF( "PDF Bkg", mva_b, PDF::kSpline2 );
1243 
1244  // calculate ROC integral from fS, fB
1245  Double_t ROC = MethodBase::GetROCIntegral(fS, fB);
1246 
1247  // calculate overlap integral
1248  if (CalcOverlapIntergral) {
1249  gTools().NormHist( mva_s_overlap );
1250  gTools().NormHist( mva_b_overlap );
1251 
1252  fOverlap_integral = 0.0;
1253  for (Int_t bin=1; bin<=mva_s_overlap->GetNbinsX(); bin++){
1254  Double_t bc_s = mva_s_overlap->GetBinContent(bin);
1255  Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1256  if (bc_s > 0.0 && bc_b > 0.0)
1257  fOverlap_integral += TMath::Min(bc_s, bc_b);
1258  }
1259 
1260  delete mva_s_overlap;
1261  delete mva_b_overlap;
1262  }
1263 
1264  delete mva_s;
1265  delete mva_b;
1266  delete fS;
1267  delete fB;
1268  if (!(singleMethod && eTT==Types::kTraining)) delete mvaRes;
1269 
1271 
1272  return ROC;
1273 }
1274 
1276 {
1277  // Calculate MVA values of current method fMethods.back() on
1278  // training sample
1279 
1281  MethodBase* method = dynamic_cast<MethodBase*>(fMethods.back());
1282  if (!method) {
1283  Log() << kFATAL << "dynamic cast to MethodBase* failed" <<Endl;
1284  return;
1285  }
1286  // calculate MVA values
1287  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1288  GetEvent(ievt);
1289  fMVAvalues->at(ievt) = method->GetMvaValue();
1290  }
1291 
1292  // fill cumulative mva distribution
1293 
1294 
1295 }
1296 
1297 
1298 ////////////////////////////////////////////////////////////////////////////////
1299 /// fill various monitoring histograms from information of the individual classifiers that
1300 /// have been boosted.
1301 /// of course.... this depends very much on the individual classifiers, and so far, only for
1302 /// Decision Trees, this monitoring is actually implemented
1303 
1305 {
1307 
1308  if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kDT) {
1309  TMVA::MethodDT* currentDT=dynamic_cast<TMVA::MethodDT*>(GetCurrentMethod(methodIndex));
1310  if (currentDT){
1311  if (stage == Types::kBoostProcBegin){
1312  results->Store(new TH1I("NodesBeforePruning","nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesBeforePruning");
1313  results->Store(new TH1I("NodesAfterPruning","nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesAfterPruning");
1314  }
1315 
1316  if (stage == Types::kBeforeTraining){
1317  }
1318  else if (stage == Types::kBeforeBoosting){
1319  results->GetHist("NodesBeforePruning")->SetBinContent(methodIndex+1,currentDT->GetNNodesBeforePruning());
1320  results->GetHist("NodesAfterPruning")->SetBinContent(methodIndex+1,currentDT->GetNNodes());
1321  }
1322  else if (stage == Types::kAfterBoosting){
1323 
1324  }
1325  else if (stage != Types::kBoostProcEnd){
1326  Log() << kINFO << "<Train> average number of nodes before/after pruning : "
1327  << results->GetHist("NodesBeforePruning")->GetMean() << " / "
1328  << results->GetHist("NodesAfterPruning")->GetMean()
1329  << Endl;
1330  }
1331  }
1332 
1333  }else if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kFisher) {
1334  if (stage == Types::kAfterBoosting){
1336  }
1337  }else{
1338  if (methodIndex < 3){
1339  Log() << kDEBUG << "No detailed boost monitoring for "
1340  << GetCurrentMethod(methodIndex)->GetMethodName()
1341  << " yet available " << Endl;
1342  }
1343  }
1344 
1345  //boosting plots universal for all classifiers 'typically for debug purposes only as they are not general enough'
1346 
1347  if (stage == Types::kBeforeBoosting){
1348  // if you want to display the weighted events for 2D case at each boost step:
1349  if (fDetailedMonitoring){
1350  // the following code is useful only for 2D examples - mainly illustration for debug/educational purposes:
1351  if (DataInfo().GetNVariables() == 2) {
1352  results->Store(new TH2F(Form("EventDistSig_%d",methodIndex),Form("EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1353  results->GetHist(Form("EventDistSig_%d",methodIndex))->SetMarkerColor(4);
1354  results->Store(new TH2F(Form("EventDistBkg_%d",methodIndex),Form("EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1355  results->GetHist(Form("EventDistBkg_%d",methodIndex))->SetMarkerColor(2);
1356 
1358  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1359  const Event* ev = GetEvent(ievt);
1360  Float_t w = ev->GetWeight();
1361  Float_t v0= ev->GetValue(0);
1362  Float_t v1= ev->GetValue(1);
1363  // if (ievt<3) std::cout<<ievt<<" var0="<<v0<<" var1="<<v1<<" weight="<<w<<std::endl;
1364  TH2* h;
1365  if (DataInfo().IsSignal(ev)) h=results->GetHist2D(Form("EventDistSig_%d",methodIndex));
1366  else h=results->GetHist2D(Form("EventDistBkg_%d",methodIndex));
1367  if (h) h->Fill(v0,v1,w);
1368  }
1369  }
1370  }
1371  }
1372 
1373  return;
1374 }
1375 
1376 
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
Config & gConfig()
Definition: Config.cxx:43
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:378
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3127
void SetMsgType(EMsgType t)
Definition: Configurable.h:131
static long int sum(long int i)
Definition: Factory.cxx:1785
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
float xmin
Definition: THbookFile.cxx:93
Random number generator class based on M.
Definition: TRandom3.h:29
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
Definition: MethodBoost.h:190
THist< 1, int, THistStatContent > TH1I
Definition: THist.hxx:304
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
Definition: TRandom.cxx:414
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
TH1 * GetHist(const TString &alias) const
Definition: Results.cxx:127
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4640
long long Long64_t
Definition: RtypesCore.h:69
Double_t fROC_training
Definition: MethodBoost.h:184
#define REGISTER_METHOD(CLASS)
for example
void SingleTrain()
initialization
std::vector< TH1 * > fTestSigMVAHist
Definition: MethodBoost.h:175
Stat_t GetSum() const
Definition: TArrayD.h:48
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
Double_t Log(Double_t x)
Definition: TMath.h:526
Ssiz_t Length() const
Definition: TString.h:390
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
float Float_t
Definition: RtypesCore.h:53
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
const char * GetName() const
Definition: MethodBase.h:330
static Types & Instance()
the the single instance of "Types" if existin already, or create it (Signleton)
Definition: Types.cxx:64
Int_t GetBoostNum()
Definition: MethodBoost.h:87
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4374
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:302
TH1 * h
Definition: legend2.C:5
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Bool_t fDetailedMonitoring
Definition: MethodBoost.h:156
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
DataSet * Data() const
Definition: MethodBase.h:405
EAnalysisType
Definition: Types.h:128
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Definition: MethodBoost.cxx:90
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:186
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
Definition: MethodBase.h:361
void SetBoostWeight(Double_t w) const
Definition: Event.h:113
Basic string class.
Definition: TString.h:137
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:390
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1089
int Int_t
Definition: RtypesCore.h:41
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory and return a pointer to the created directory.
Definition: TDirectory.cxx:957
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
std::vector< TH1 * > fTrainBgdMVAHist
Definition: MethodBoost.h:170
const Ranking * CreateRanking()
virtual Int_t GetNbinsX() const
Definition: TH1.h:301
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:374
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Definition: Timer.cxx:129
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:837
const TString & GetMethodName() const
Definition: MethodBase.h:327
void SetMethodDir(TDirectory *methodDir)
Definition: MethodBase.h:368
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:233
Double_t fOverlap_integral
Definition: MethodBoost.h:188
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:85
const char * Data() const
Definition: TString.h:349
TH2 * GetHist2D(const TString &alias) const
Definition: Results.cxx:136
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:433
static void InhibitOutput()
Definition: MsgLogger.cxx:69
Tools & gTools()
Definition: Tools.cxx:79
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
TStopwatch timer
Definition: pirndm.C:37
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:8219
void AddEvent(Double_t val, Double_t weight, Int_t type)
Bool_t IsSignal(const Event *ev) const
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
std::vector< Double_t > fMethodWeight
virtual ~MethodBoost(void)
destructor
TString fWeightFileDir
Definition: Config.h:100
void ScaleBoostWeight(Double_t s) const
Definition: Event.h:114
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
Definition: TAttMarker.h:43
Int_t GetNNodes()
Definition: MethodDT.h:109
IONames & GetIONames()
Definition: Config.h:78
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:395
virtual void ParseOptions()
options parser
void GetHelpMessage() const
Get help message text.
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:403
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:413
Types::EMVA GetMethodType() const
Definition: MethodBase.h:329
Definition: PDF.h:71
Double_t GetOriginalWeight() const
Definition: Event.h:87
UInt_t GetNVariables() const
Definition: MethodBase.h:341
virtual void Delete(Option_t *option="")
Delete this object.
Definition: TObject.cxx:229
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9001
TString fHistoricOption
Definition: MethodBoost.h:196
RooCmdArg Timer(Bool_t flag=kTRUE)
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:6717
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
Definition: DataSet.cxx:286
Service class for 2-Dim histogram classes.
Definition: TH2.h:36
SVector< double, 2 > v
Definition: Dict.h:5
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------— ...
class TMVA::Config::VariablePlotting fVariablePlotting
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Double_t fBaggedSampleFraction
Definition: MethodBoost.h:160
ClassInfo * GetClassInfo(Int_t clNum) const
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8280
TString fTransformString
Definition: MethodBoost.h:155
unsigned int UInt_t
Definition: RtypesCore.h:42
TMarker * m
Definition: textangle.C:8
const Event * GetEvent() const
Definition: MethodBase.h:745
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
TAxis * GetYaxis()
Definition: TH1.h:325
float xmax
Definition: THbookFile.cxx:93
tomato 1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:618
virtual TDirectory * GetDirectory(const char *apath, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory named "apath".
Bool_t IsSilentFile()
Definition: MethodBase.h:375
REAL epsilon
Definition: triangle.c:617
void CreateMVAHistorgrams()
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Definition: TMath.cxx:452
const Event * GetEvent() const
Definition: DataSet.cxx:211
virtual Double_t GetSeparationGain(const Double_t &nSelS, const Double_t &nSelB, const Double_t &nTotS, const Double_t &nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
TString & Remove(Ssiz_t pos)
Definition: TString.h:616
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:114
int Ssiz_t
Definition: RtypesCore.h:63
TString GetMethodName(Types::EMVA method) const
Definition: Types.cxx:130
TFile * GetFile() const
Definition: MethodBase.h:366
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString fBoostedMethodOptions
Definition: MethodBoost.h:164
Double_t Exp(Double_t x)
Definition: TMath.h:495
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:430
Bool_t fMonitorBoostedMethod
Definition: MethodBoost.h:166
#define ClassImp(name)
Definition: Rtypes.h:279
void RerouteTransformationHandler(TransformationHandler *fTargetTransformation)
Definition: MethodBase.h:399
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:44
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
std::vector< TH1 * > fTrainSigMVAHist
Definition: MethodBoost.h:169
TString fBoostedMethodTitle
Definition: MethodBoost.h:163
int type
Definition: TGX11.cxx:120
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:229
MsgLogger & Log() const
Definition: Configurable.h:128
The TH1 histogram class.
Definition: TH1.h:80
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
Double_t fMethodError
Definition: MethodBoost.h:182
void AddPreDefVal(const T &)
Definition: Configurable.h:174
UInt_t GetClass() const
Definition: Event.h:89
Int_t GetNNodesBeforePruning()
Definition: MethodDT.h:108
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:420
virtual void TestClassification()
initialization
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1651
TString fBoostedMethodName
Definition: MethodBoost.h:162
std::vector< IMethod * > fMethods
Abstract ClassifierFactory template that handles arbitrary types.
Double_t fAdaBoostBeta
Definition: MethodBoost.h:158
Double_t GetMVAProbAt(Double_t value)
DataSetManager * fDataSetManager
Definition: MethodBoost.h:192
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:435
const TString & GetJobName() const
Definition: MethodBase.h:326
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:590
virtual Double_t GetSeparationIndex(const Double_t &s, const Double_t &b)=0
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202
const TString & GetOptions() const
Definition: Configurable.h:90
const Event * fTmpEvent
Definition: MethodBase.h:408
void SetWeightFileDir(TString fileDir)
set directory of weight file
Bool_t fHistoricBoolOption
Definition: MethodBoost.h:197
void InitHistos()
initialisation routine
#define NULL
Definition: Rtypes.h:82
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Definition: THist.hxx:301
A TTree object has a header with a name and a title.
Definition: TTree.h:98
TTree * fMonitorTree
Definition: MethodBoost.h:180
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:356
void Store(TObject *obj, const char *alias=0)
Definition: Results.cxx:83
std::vector< TH1 * > fBTrainSigMVAHist
Definition: MethodBoost.h:172
static void EnableOutput()
Definition: MsgLogger.cxx:70
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:582
Double_t fBoostWeight
Definition: MethodBoost.h:181
const Bool_t kTRUE
Definition: Rtypes.h:91
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:155
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Definition: THist.hxx:308
std::vector< TH1 * > fBTrainBgdMVAHist
Definition: MethodBoost.h:173
UInt_t GetNumber() const
Definition: ClassInfo.h:73
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
void ComputeStat(const std::vector< TMVA::Event * > &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:215
virtual void TestClassification()
initialization
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:432
std::vector< TH1 * > fTestBgdMVAHist
Definition: MethodBoost.h:177
TAxis * GetXaxis()
Definition: TH1.h:324
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:819
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:360
Bool_t IsModelPersistence()
Definition: MethodBase.h:379