Logo ROOT   6.08/07
Reference Guide
MethodBoost.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCompositeBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * U. of Victoria, Canada *
24  * MPI-K Heidelberg, Germany *
25  * U. of Bonn, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  **********************************************************************************/
31 
32 //_______________________________________________________________________
33 //
34 // This class is meant to boost a single classifier. Boosting means //
35 // training the classifier a few times. Everytime the wieghts of the //
36 // events are modified according to how well the classifier performed //
37 // on the test sample. //
38 ////////////////////////////////////////////////////////////////////////////////
39 
40 #include "TMVA/MethodBoost.h"
41 
42 #include "TMVA/ClassifierFactory.h"
43 #include "TMVA/Config.h"
44 #include "TMVA/Configurable.h"
45 #include "TMVA/DataSet.h"
46 #include "TMVA/DataSetInfo.h"
47 #include "TMVA/IMethod.h"
48 #include "TMVA/MethodBase.h"
49 #include "TMVA/MethodCategory.h"
51 #include "TMVA/MethodDT.h"
52 #include "TMVA/MethodFisher.h"
53 #include "TMVA/PDF.h"
54 #include "TMVA/Results.h"
55 #include "TMVA/Timer.h"
56 #include "TMVA/Tools.h"
57 #include "TMVA/Types.h"
58 
59 #include "TMVA/SeparationBase.h"
61 #include "TMVA/GiniIndex.h"
62 #include "TMVA/CrossEntropy.h"
65 
66 #include "Riostream.h"
67 #include "TRandom3.h"
68 #include "TFile.h"
69 #include "TMath.h"
70 #include "TObjString.h"
71 #include "TH1F.h"
72 #include "TH2F.h"
73 #include "TGraph.h"
74 #include "TSpline.h"
75 #include "TDirectory.h"
76 #include "TTree.h"
77 
78 #include <algorithm>
79 #include <iomanip>
80 #include <vector>
81 #include <cmath>
82 
83 
84 REGISTER_METHOD(Boost)
85 
87 
88 ////////////////////////////////////////////////////////////////////////////////
89 
90  TMVA::MethodBoost::MethodBoost( const TString& jobName,
91  const TString& methodTitle,
92  DataSetInfo& theData,
93  const TString& theOption ) :
94  TMVA::MethodCompositeBase( jobName, Types::kBoost, methodTitle, theData, theOption)
95  , fBoostNum(0)
96  , fDetailedMonitoring(kFALSE)
97  , fAdaBoostBeta(0)
98  , fRandomSeed(0)
99  , fBaggedSampleFraction(0)
100  , fBoostedMethodTitle(methodTitle)
101  , fBoostedMethodOptions(theOption)
102  , fMonitorBoostedMethod(kFALSE)
103  , fMonitorTree(0)
104  , fBoostWeight(0)
105  , fMethodError(0)
106  , fROC_training(0.0)
107  , fOverlap_integral(0.0)
108  , fMVAvalues(0)
109 {
110  fMVAvalues = new std::vector<Float_t>;
111 }
112 
113 ////////////////////////////////////////////////////////////////////////////////
114 
116  const TString& theWeightFile)
117  : TMVA::MethodCompositeBase( Types::kBoost, dsi, theWeightFile)
118  , fBoostNum(0)
119  , fDetailedMonitoring(kFALSE)
120  , fAdaBoostBeta(0)
121  , fRandomSeed(0)
122  , fBaggedSampleFraction(0)
123  , fBoostedMethodTitle("")
124  , fBoostedMethodOptions("")
125  , fMonitorBoostedMethod(kFALSE)
126  , fMonitorTree(0)
127  , fBoostWeight(0)
128  , fMethodError(0)
129  , fROC_training(0.0)
130  , fOverlap_integral(0.0)
131  , fMVAvalues(0)
132 {
133  fMVAvalues = new std::vector<Float_t>;
134 }
135 
136 ////////////////////////////////////////////////////////////////////////////////
137 /// destructor
138 
140 {
141  fMethodWeight.clear();
142 
143  // the histogram themselves are deleted when the file is closed
144 
145  fTrainSigMVAHist.clear();
146  fTrainBgdMVAHist.clear();
147  fBTrainSigMVAHist.clear();
148  fBTrainBgdMVAHist.clear();
149  fTestSigMVAHist.clear();
150  fTestBgdMVAHist.clear();
151 
152  if (fMVAvalues) {
153  delete fMVAvalues;
154  fMVAvalues = 0;
155  }
156 }
157 
158 
159 ////////////////////////////////////////////////////////////////////////////////
160 /// Boost can handle classification with 2 classes and regression with one regression-target
161 
163 {
164  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
165  // if (type == Types::kRegression && numberTargets == 1) return kTRUE;
166  return kFALSE;
167 }
168 
169 
170 ////////////////////////////////////////////////////////////////////////////////
171 
173 {
174  DeclareOptionRef( fBoostNum = 1, "Boost_Num",
175  "Number of times the classifier is boosted" );
176 
177  DeclareOptionRef( fMonitorBoostedMethod = kTRUE, "Boost_MonitorMethod",
178  "Write monitoring histograms for each boosted classifier" );
179 
180  DeclareOptionRef( fDetailedMonitoring = kFALSE, "Boost_DetailedMonitoring",
181  "Produce histograms for detailed boost monitoring" );
182 
183  DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
184  AddPreDefVal(TString("RealAdaBoost"));
185  AddPreDefVal(TString("AdaBoost"));
186  AddPreDefVal(TString("Bagging"));
187 
188  DeclareOptionRef(fBaggedSampleFraction=.6,"Boost_BaggedSampleFraction","Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
189 
190  DeclareOptionRef( fAdaBoostBeta = 1.0, "Boost_AdaBoostBeta",
191  "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
192 
193  DeclareOptionRef( fTransformString = "step", "Boost_Transform",
194  "Type of transform applied to every boosted method linear, log, step" );
195  AddPreDefVal(TString("step"));
196  AddPreDefVal(TString("linear"));
197  AddPreDefVal(TString("log"));
198  AddPreDefVal(TString("gauss"));
199 
200  DeclareOptionRef( fRandomSeed = 0, "Boost_RandomSeed",
201  "Seed for random number generator used for bagging" );
202 
204 }
205 
206 ////////////////////////////////////////////////////////////////////////////////
207 /// options that are used ONLY for the READER to ensure backward compatibility
208 /// they are hence without any effect (the reader is only reading the training
209 /// options that HAD been used at the training of the .xml weightfile at hand
210 
212 {
213 
215 
216  DeclareOptionRef( fHistoricOption = "ByError", "Boost_MethodWeightType",
217  "How to set the final weight of the boosted classifiers" );
218  AddPreDefVal(TString("ByError"));
219  AddPreDefVal(TString("Average"));
220  AddPreDefVal(TString("ByROC"));
221  AddPreDefVal(TString("ByOverlap"));
222  AddPreDefVal(TString("LastMethod"));
223 
224  DeclareOptionRef( fHistoricOption = "step", "Boost_Transform",
225  "Type of transform applied to every boosted method linear, log, step" );
226  AddPreDefVal(TString("step"));
227  AddPreDefVal(TString("linear"));
228  AddPreDefVal(TString("log"));
229  AddPreDefVal(TString("gauss"));
230 
231  // this option here
232  //DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
233  // still exists, but these two possible values
234  AddPreDefVal(TString("HighEdgeGauss"));
235  AddPreDefVal(TString("HighEdgeCoPara"));
236  // have been deleted .. hope that works :)
237 
238  DeclareOptionRef( fHistoricBoolOption, "Boost_RecalculateMVACut",
239  "Recalculate the classifier MVA Signallike cut at every boost iteration" );
240 
241 }
242 ////////////////////////////////////////////////////////////////////////////////
243 /// just registering the string from which the boosted classifier will be created
244 
245 Bool_t TMVA::MethodBoost::BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption )
246 {
248  fBoostedMethodTitle = methodTitle;
249  fBoostedMethodOptions = theOption;
250  TString opts=theOption;
251  opts.ToLower();
252  // if (opts.Contains("vartransform")) Log() << kFATAL << "It is not possible to use boost in conjunction with variable transform. Please remove either Boost_Num or VarTransform from the option string"<< methodTitle<<Endl;
253 
254  return kTRUE;
255 }
256 
257 ////////////////////////////////////////////////////////////////////////////////
258 
260 {
261 }
262 
263 ////////////////////////////////////////////////////////////////////////////////
264 /// initialisation routine
265 
267 {
268 
270 
271  results->Store(new TH1F("MethodWeight","Normalized Classifier Weight",fBoostNum,0,fBoostNum),"ClassifierWeight");
272  results->Store(new TH1F("BoostWeight","Boost Weight",fBoostNum,0,fBoostNum),"BoostWeight");
273  results->Store(new TH1F("ErrFraction","Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),"ErrorFraction");
274  if (fDetailedMonitoring){
275  results->Store(new TH1F("ROCIntegral_test","ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegral_test");
276  results->Store(new TH1F("ROCIntegralBoosted_test","ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_test");
277  results->Store(new TH1F("ROCIntegral_train","ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),"ROCIntegral_train");
278  results->Store(new TH1F("ROCIntegralBoosted_train","ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_train");
279  results->Store(new TH1F("OverlapIntegal_train","Overlap integral (training sample)",fBoostNum,0,fBoostNum),"Overlap");
280  }
281 
282 
283  results->GetHist("ClassifierWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
284  results->GetHist("ClassifierWeight")->GetYaxis()->SetTitle("Classifier Weight");
285  results->GetHist("BoostWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
286  results->GetHist("BoostWeight")->GetYaxis()->SetTitle("Boost Weight");
287  results->GetHist("ErrorFraction")->GetXaxis()->SetTitle("Index of boosted classifier");
288  results->GetHist("ErrorFraction")->GetYaxis()->SetTitle("Error Fraction");
289  if (fDetailedMonitoring){
290  results->GetHist("ROCIntegral_test")->GetXaxis()->SetTitle("Index of boosted classifier");
291  results->GetHist("ROCIntegral_test")->GetYaxis()->SetTitle("ROC integral of single classifier");
292  results->GetHist("ROCIntegralBoosted_test")->GetXaxis()->SetTitle("Number of boosts");
293  results->GetHist("ROCIntegralBoosted_test")->GetYaxis()->SetTitle("ROC integral boosted");
294  results->GetHist("ROCIntegral_train")->GetXaxis()->SetTitle("Index of boosted classifier");
295  results->GetHist("ROCIntegral_train")->GetYaxis()->SetTitle("ROC integral of single classifier");
296  results->GetHist("ROCIntegralBoosted_train")->GetXaxis()->SetTitle("Number of boosts");
297  results->GetHist("ROCIntegralBoosted_train")->GetYaxis()->SetTitle("ROC integral boosted");
298  results->GetHist("Overlap")->GetXaxis()->SetTitle("Index of boosted classifier");
299  results->GetHist("Overlap")->GetYaxis()->SetTitle("Overlap integral");
300  }
301 
302  results->Store(new TH1F("SoverBtotal","S/B in reweighted training sample",fBoostNum,0,fBoostNum),"SoverBtotal");
303  results->GetHist("SoverBtotal")->GetYaxis()->SetTitle("S/B (boosted sample)");
304  results->GetHist("SoverBtotal")->GetXaxis()->SetTitle("Index of boosted classifier");
305 
306  results->Store(new TH1F("SeparationGain","SeparationGain",fBoostNum,0,fBoostNum),"SeparationGain");
307  results->GetHist("SeparationGain")->GetYaxis()->SetTitle("SeparationGain");
308  results->GetHist("SeparationGain")->GetXaxis()->SetTitle("Index of boosted classifier");
309 
310 
311 
312  fMonitorTree= new TTree("MonitorBoost","Boost variables");
313  fMonitorTree->Branch("iMethod",&fCurrentMethodIdx,"iMethod/I");
314  fMonitorTree->Branch("boostWeight",&fBoostWeight,"boostWeight/D");
315  fMonitorTree->Branch("errorFraction",&fMethodError,"errorFraction/D");
317 
318 }
319 
320 
321 ////////////////////////////////////////////////////////////////////////////////
322 
324 {
325  Log() << kDEBUG << "CheckSetup: fBoostType="<<fBoostType << Endl;
326  Log() << kDEBUG << "CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<Endl;
327  Log() << kDEBUG << "CheckSetup: fBoostWeight="<<fBoostWeight<<Endl;
328  Log() << kDEBUG << "CheckSetup: fMethodError="<<fMethodError<<Endl;
329  Log() << kDEBUG << "CheckSetup: fBoostNum="<<fBoostNum << Endl;
330  Log() << kDEBUG << "CheckSetup: fRandomSeed=" << fRandomSeed<< Endl;
331  Log() << kDEBUG << "CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<Endl;
332  Log() << kDEBUG << "CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<Endl;
333  Log() << kDEBUG << "CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod? "true" : "false") << Endl;
334  Log() << kDEBUG << "CheckSetup: MName=" << fBoostedMethodName << " Title="<< fBoostedMethodTitle<< Endl;
335  Log() << kDEBUG << "CheckSetup: MOptions="<< fBoostedMethodOptions << Endl;
336  Log() << kDEBUG << "CheckSetup: fMonitorTree=" << fMonitorTree <<Endl;
337  Log() << kDEBUG << "CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx << Endl;
338  if (fMethods.size()>0) Log() << kDEBUG << "CheckSetup: fMethods[0]" <<fMethods[0]<<Endl;
339  Log() << kDEBUG << "CheckSetup: fMethodWeight.size()" << fMethodWeight.size() << Endl;
340  if (fMethodWeight.size()>0) Log() << kDEBUG << "CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<Endl;
341  Log() << kDEBUG << "CheckSetup: trying to repair things" << Endl;
342 
343 }
344 ////////////////////////////////////////////////////////////////////////////////
345 
347 {
348  TDirectory* methodDir( 0 );
349  TString dirName,dirTitle;
350  Int_t StopCounter=0;
352 
353 
354  InitHistos();
355 
356  if (Data()->GetNTrainingEvents()==0) Log() << kFATAL << "<Train> Data() has zero events" << Endl;
358 
359  if (fMethods.size() > 0) fMethods.clear();
360  fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0);
361 
362  Log() << kINFO << "Training "<< fBoostNum << " " << fBoostedMethodName << " with title " << fBoostedMethodTitle << " Classifiers ... patience please" << Endl;
364 
366 
367  // clean boosted method options
369 
370 
371  // remove transformations for individual boosting steps
372  // the transformation of the main method will be rerouted to each of the boost steps
373  Ssiz_t varTrafoStart=fBoostedMethodOptions.Index("~VarTransform=");
374  if (varTrafoStart >0) {
375  Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(":",varTrafoStart);
376  if (varTrafoEnd<varTrafoStart)
377  varTrafoEnd=fBoostedMethodOptions.Length();
378  fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
379  }
380 
381  //
382  // training and boosting the classifiers
384  // the first classifier shows the option string output, the rest not
386 
388  GetJobName(),
390  DataInfo(),
393 
394  // supressing the rest of the classifier output the right way
395  fCurrentMethod = (dynamic_cast<MethodBase*>(method));
396 
397  if (fCurrentMethod==0) {
398  Log() << kFATAL << "uups.. guess the booking of the " << fCurrentMethodIdx << "-th classifier somehow failed" << Endl;
399  return; // hope that makes coverity happy (as if fears I migh use the pointer later on, not knowing that FATAL exits
400  }
401 
402  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
403  if (fCurrentMethod->GetMethodType() == Types::kCategory) { // DSMTEST
404  MethodCategory *methCat = (dynamic_cast<MethodCategory*>(fCurrentMethod)); // DSMTEST
405  if (!methCat) // DSMTEST
406  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" << Endl; // DSMTEST
407  methCat->fDataSetManager = fDataSetManager; // DSMTEST
408  } // DSMTEST
409 
413  // put SetAnalysisType here for the needs of MLP
417 
418 
419  // reroute transformationhandler
421 
422 
423  // creating the directory of the classifier
424  if(!IsSilentFile())
425  {
426  if (fMonitorBoostedMethod) {
427  methodDir=GetFile()->GetDirectory(dirName=Form("%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
428  if (methodDir==0) {
429  methodDir=BaseDir()->mkdir(dirName,dirTitle=Form("Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
430  }
431  fCurrentMethod->SetMethodDir(methodDir);
432  fCurrentMethod->BaseDir()->cd();
433  }
434  }
435 
436  // training
437  TMVA::MethodCompositeBase::fMethods.push_back(method);
441  TMVA::MsgLogger::InhibitOutput(); //supressing Logger outside the method
442  if (fBoostType=="Bagging") Bagging(); // you want also to train the first classifier on a bagged sample
443  SingleTrain();
446 
447  // calculate MVA values of current method for all events in training sample
448  // (used later on to get 'misclassified events' etc for the boosting
449  CalcMVAValues();
450 
452 
453  // get ROC integral and overlap integral for single method on
454  // training sample if fMethodWeightType == "ByROC" or the user
455  // wants detailed monitoring
456 
457  // boosting (reweight training sample)
460 
462  results->GetHist("BoostWeight")->SetBinContent(fCurrentMethodIdx+1,fBoostWeight);
463  results->GetHist("ErrorFraction")->SetBinContent(fCurrentMethodIdx+1,fMethodError);
464 
465  if (fDetailedMonitoring) {
468  results->GetHist("ROCIntegralBoosted_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTesting));
469  results->GetHist("ROCIntegral_train")->SetBinContent(fCurrentMethodIdx+1, fROC_training);
470  results->GetHist("ROCIntegralBoosted_train")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTraining));
471  results->GetHist("Overlap")->SetBinContent(fCurrentMethodIdx+1, fOverlap_integral);
472  }
473 
474 
475 
476  fMonitorTree->Fill();
477 
478  // stop boosting if needed when error has reached 0.5
479  // thought of counting a few steps, but it doesn't seem to be necessary
480  Log() << kDEBUG << "AdaBoost (methodErr) err = " << fMethodError << Endl;
481  if (fMethodError > 0.49999) StopCounter++;
482  if (StopCounter > 0 && fBoostType != "Bagging") {
483  timer.DrawProgressBar( fBoostNum );
484  fBoostNum = fCurrentMethodIdx+1;
485  Log() << kINFO << "Error rate has reached 0.5 ("<< fMethodError<<"), boosting process stopped at #" << fBoostNum << " classifier" << Endl;
486  if (fBoostNum < 5)
487  Log() << kINFO << "The classifier might be too strong to boost with Beta = " << fAdaBoostBeta << ", try reducing it." <<Endl;
488  break;
489  }
490  }
491 
492  //as MethodBoost acts not on a private event sample (like MethodBDT does), we need to remember not
493  // to leave "boosted" events to the next classifier in the factory
494 
496 
497  Timer* timer1= new Timer( fBoostNum, GetName() );
498  // normalizing the weights of the classifiers
500  // pefroming post-boosting actions
501 
503 
504  if (fCurrentMethodIdx==fBoostNum) {
505  Log() << kINFO << "Elapsed time: " << timer1->GetElapsedTime()
506  << " " << Endl;
507  }
508 
509  TH1F* tmp = dynamic_cast<TH1F*>( results->GetHist("ClassifierWeight") );
511 
512  }
513 
514  // Ensure that in case of only 1 boost the method weight equals
515  // 1.0. This avoids unexpected behaviour in case of very bad
516  // classifiers which have fBoostWeight=1 or fMethodError=0.5,
517  // because their weight would be set to zero. This behaviour is
518  // not ok if one boosts just one time.
519  if (fMethods.size()==1) fMethodWeight[0] = 1.0;
520 
522 
523  delete timer1;
524 }
525 
526 ////////////////////////////////////////////////////////////////////////////////
527 
529 {
531 }
532 
533 ////////////////////////////////////////////////////////////////////////////////
534 
536 {
537  if (fBoostNum <=0) Log() << kFATAL << "CreateHistorgrams called before fBoostNum is initialized" << Endl;
538  // calculating histograms boundries and creating histograms..
539  // nrms = number of rms around the average to use for outline (of the 0 classifier)
540  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
541  Int_t signalClass = 0;
542  if (DataInfo().GetClassInfo("Signal") != 0) {
543  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
544  }
546  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
547 
549  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
550  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.00001;
551 
552  // creating all the historgrams
553  for (UInt_t imtd=0; imtd<fBoostNum; imtd++) {
554  fTrainSigMVAHist .push_back( new TH1F( Form("MVA_Train_S_%04i",imtd), "MVA_Train_S", fNbins, xmin, xmax ) );
555  fTrainBgdMVAHist .push_back( new TH1F( Form("MVA_Train_B%04i", imtd), "MVA_Train_B", fNbins, xmin, xmax ) );
556  fBTrainSigMVAHist.push_back( new TH1F( Form("MVA_BTrain_S%04i",imtd), "MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
557  fBTrainBgdMVAHist.push_back( new TH1F( Form("MVA_BTrain_B%04i",imtd), "MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
558  fTestSigMVAHist .push_back( new TH1F( Form("MVA_Test_S%04i", imtd), "MVA_Test_S", fNbins, xmin, xmax ) );
559  fTestBgdMVAHist .push_back( new TH1F( Form("MVA_Test_B%04i", imtd), "MVA_Test_B", fNbins, xmin, xmax ) );
560  }
561 }
562 
563 ////////////////////////////////////////////////////////////////////////////////
564 /// resetting back the boosted weights of the events to 1
565 
567 {
568  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
569  const Event *ev = Data()->GetEvent(ievt);
570  ev->SetBoostWeight( 1.0 );
571  }
572 }
573 
574 ////////////////////////////////////////////////////////////////////////////////
575 
577 {
578  TDirectory* dir=0;
579  if (fMonitorBoostedMethod) {
580  for (UInt_t imtd=0;imtd<fBoostNum;imtd++) {
581 
582  //writing the histograms in the specific classifier's directory
583  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[imtd]);
584  if (!m) continue;
585  dir = m->BaseDir();
586  dir->cd();
587  fTrainSigMVAHist[imtd]->SetDirectory(dir);
588  fTrainSigMVAHist[imtd]->Write();
589  fTrainBgdMVAHist[imtd]->SetDirectory(dir);
590  fTrainBgdMVAHist[imtd]->Write();
591  fBTrainSigMVAHist[imtd]->SetDirectory(dir);
592  fBTrainSigMVAHist[imtd]->Write();
593  fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
594  fBTrainBgdMVAHist[imtd]->Write();
595  }
596  }
597 
598  // going back to the original folder
599  BaseDir()->cd();
600 
601  fMonitorTree->Write();
602 }
603 
604 ////////////////////////////////////////////////////////////////////////////////
605 
607 {
609  if (fMonitorBoostedMethod) {
610  UInt_t nloop = fTestSigMVAHist.size();
611  if (fMethods.size()<nloop) nloop = fMethods.size();
612  //running over all the events and populating the test MVA histograms
614  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
615  const Event* ev = GetEvent(ievt);
616  Float_t w = ev->GetWeight();
617  if (DataInfo().IsSignal(ev)) {
618  for (UInt_t imtd=0; imtd<nloop; imtd++) {
619  fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
620  }
621  }
622  else {
623  for (UInt_t imtd=0; imtd<nloop; imtd++) {
624  fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
625  }
626  }
627  }
629  }
630 }
631 
632 ////////////////////////////////////////////////////////////////////////////////
633 
635 {
637  if (treetype==Types::kTraining) return;
638  UInt_t nloop = fTestSigMVAHist.size();
639  if (fMethods.size()<nloop) nloop = fMethods.size();
640  if (fMonitorBoostedMethod) {
641  TDirectory* dir=0;
642  for (UInt_t imtd=0;imtd<nloop;imtd++) {
643  //writing the histograms in the specific classifier's directory
644  MethodBase* mva = dynamic_cast<MethodBase*>(fMethods[imtd]);
645  if (!mva) continue;
646  dir = mva->BaseDir();
647  if (dir==0) continue;
648  dir->cd();
649  fTestSigMVAHist[imtd]->SetDirectory(dir);
650  fTestSigMVAHist[imtd]->Write();
651  fTestBgdMVAHist[imtd]->SetDirectory(dir);
652  fTestBgdMVAHist[imtd]->Write();
653  }
654  }
655 }
656 
657 ////////////////////////////////////////////////////////////////////////////////
658 /// process user options
659 
661 {
662 }
663 
664 ////////////////////////////////////////////////////////////////////////////////
665 /// initialization
666 
668 {
670  MethodBase* meth = dynamic_cast<MethodBase*>(GetLastMethod());
671  if (meth){
672  meth->SetSilentFile(IsSilentFile());
673  if(IsModelPersistence()){
674  TString _fFileDir= DataInfo().GetName();
675  _fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
676  meth->SetWeightFileDir(_fFileDir);
677  }
679  meth->TrainMethod();
680  }
681 }
682 
683 ////////////////////////////////////////////////////////////////////////////////
684 /// find the CUT on the individual MVA that defines an event as
685 /// correct or misclassified (to be used in the boosting process)
686 
688 {
689  if (!method || method->GetMethodType() == Types::kDT ){ return;}
690 
691  // creating a fine histograms containing the error rate
692  const Int_t nBins=10001;
693  Double_t minMVA=150000;
694  Double_t maxMVA=-150000;
695  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
696  GetEvent(ievt);
697  Double_t val=method->GetMvaValue();
698  //Helge .. I think one could very well use fMVAValues for that ... -->to do
699  if (val>maxMVA) maxMVA=val;
700  if (val<minMVA) minMVA=val;
701  }
702  maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
703 
704  Double_t sum = 0.;
705 
706  TH1D *mvaS = new TH1D(Form("MVAS_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
707  TH1D *mvaB = new TH1D(Form("MVAB_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
708  TH1D *mvaSC = new TH1D(Form("MVASC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
709  TH1D *mvaBC = new TH1D(Form("MVABC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
710 
711 
713  if (fDetailedMonitoring){
714  results->Store(mvaS, Form("MVAS_%d",fCurrentMethodIdx));
715  results->Store(mvaB, Form("MVAB_%d",fCurrentMethodIdx));
716  results->Store(mvaSC,Form("MVASC_%d",fCurrentMethodIdx));
717  results->Store(mvaBC,Form("MVABC_%d",fCurrentMethodIdx));
718  }
719 
720  for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
721 
722  Double_t weight = GetEvent(ievt)->GetWeight();
723  Double_t mvaVal=method->GetMvaValue();
724  sum +=weight;
725  if (DataInfo().IsSignal(GetEvent(ievt))){
726  mvaS->Fill(mvaVal,weight);
727  }else {
728  mvaB->Fill(mvaVal,weight);
729  }
730  }
731  SeparationBase *sepGain;
732 
733 
734  // Boosting should use Miscalssification not Gini Index (changed, Helge 31.5.2013)
735  // ACHTUNG !! mit "Misclassification" geht es NUR wenn man die Signal zu Background bei jedem Boost schritt
736  // wieder hinbiegt. Es gibt aber komischerweise bessere Ergebnisse (genau wie bei BDT auch schon beobachtet) wenn
737  // man GiniIndex benutzt und akzeptiert dass jedes andere mal KEIN vernuenftiger Cut gefunden wird - d.h. der
738  // Cut liegt dann ausserhalb der MVA value range, alle events sind als Bkg classifiziert und dann wird entpsrehcend
739  // des Boost algorithmus 'automitisch' etwas renormiert .. sodass im naechsten Schritt dann wieder was vernuenftiges
740  // rauskommt. Komisch .. dass DAS richtig sein soll ??
741 
742  // SeparationBase *sepGain2 = new MisClassificationError();
743  //sepGain = new MisClassificationError();
744  sepGain = new GiniIndex();
745  //sepGain = new CrossEntropy();
746 
747  Double_t sTot = mvaS->GetSum();
748  Double_t bTot = mvaB->GetSum();
749 
750  mvaSC->SetBinContent(1,mvaS->GetBinContent(1));
751  mvaBC->SetBinContent(1,mvaB->GetBinContent(1));
752  Double_t sSel=0;
753  Double_t bSel=0;
754  Double_t separationGain=sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
755  Double_t mvaCut=mvaSC->GetBinLowEdge(1);
756  Double_t sSelCut=sSel;
757  Double_t bSelCut=bSel;
758  // std::cout << "minMVA =" << minMVA << " maxMVA = " << maxMVA << " width = " << mvaSC->GetBinWidth(1) << std::endl;
759 
760  // for (Int_t ibin=1;ibin<=nBins;ibin++) std::cout << " cutvalues[" << ibin<<"]="<<mvaSC->GetBinLowEdge(ibin) << " " << mvaSC->GetBinCenter(ibin) << std::endl;
761  Double_t mvaCutOrientation=1; // 1 if mva > mvaCut --> Signal and -1 if mva < mvaCut (i.e. mva*-1 > mvaCut*-1) --> Signal
762  for (Int_t ibin=1;ibin<=nBins;ibin++){
763  mvaSC->SetBinContent(ibin,mvaS->GetBinContent(ibin)+mvaSC->GetBinContent(ibin-1));
764  mvaBC->SetBinContent(ibin,mvaB->GetBinContent(ibin)+mvaBC->GetBinContent(ibin-1));
765 
766  sSel=mvaSC->GetBinContent(ibin);
767  bSel=mvaBC->GetBinContent(ibin);
768 
769  // if (ibin==nBins){
770  // std::cout << "Last bin s="<< sSel <<" b="<<bSel << " s="<< sTot-sSel <<" b="<<bTot-bSel << endl;
771  // }
772 
773  if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
774  // && (mvaSC->GetBinCenter(ibin) >0 || (fCurrentMethodIdx+1)%2 )
775  ){
776  separationGain = sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
777  // mvaCut=mvaSC->GetBinCenter(ibin);
778  mvaCut=mvaSC->GetBinLowEdge(ibin+1);
779  // if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) mvaCutOrientation=-1;
780  if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
781  else mvaCutOrientation=1;
782  sSelCut=sSel;
783  bSelCut=bSel;
784  // std::cout << "new cut at " << mvaCut << "with s="<<sTot-sSel << " b="<<bTot-bSel << std::endl;
785  }
786  /*
787  Double_t ori;
788  if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) ori=-1;
789  else ori=1;
790  std::cout << ibin << " mvacut="<<mvaCut
791  << " sTot=" << sTot
792  << " bTot=" << bTot
793  << " sSel=" << sSel
794  << " bSel=" << bSel
795  << " s/b(1)=" << sSel/bSel
796  << " s/b(2)=" << (sTot-sSel)/(bTot-bSel)
797  << " sepGain="<<sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
798  << " sepGain2="<<sepGain2->GetSeparationGain(sSel,bSel,sTot,bTot)
799  << " " <<ori
800  << std::endl;
801  */
802 
803  }
804 
805  if (0){
806  double parentIndex=sepGain->GetSeparationIndex(sTot,bTot);
807  double leftIndex =sepGain->GetSeparationIndex(sSelCut,bSelCut);
808  double rightIndex =sepGain->GetSeparationIndex(sTot-sSelCut,bTot-bSelCut);
809  std::cout
810  << " sTot=" << sTot
811  << " bTot=" << bTot
812  << " s="<<sSelCut
813  << " b="<<bSelCut
814  << " s2="<<(sTot-sSelCut)
815  << " b2="<<(bTot-bSelCut)
816  << " s/b(1)=" << sSelCut/bSelCut
817  << " s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
818  << " index before cut=" << parentIndex
819  << " after: left=" << leftIndex
820  << " after: right=" << rightIndex
821  << " sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
822  << " sepGain="<<separationGain
823  << " sepGain="<<sepGain->GetSeparationGain(sSelCut,bSelCut,sTot,bTot)
824  << " cut=" << mvaCut
825  << " idx="<<fCurrentMethodIdx
826  << " cutOrientation="<<mvaCutOrientation
827  << std::endl;
828  }
829  method->SetSignalReferenceCut(mvaCut);
830  method->SetSignalReferenceCutOrientation(mvaCutOrientation);
831 
832  results->GetHist("SeparationGain")->SetBinContent(fCurrentMethodIdx+1,separationGain);
833 
834 
835  Log() << kDEBUG << "(old step) Setting method cut to " <<method->GetSignalReferenceCut()<< Endl;
836 
837  if(IsSilentFile())
838  {
839  mvaS ->Delete();
840  mvaB ->Delete();
841  mvaSC->Delete();
842  mvaBC->Delete();
843  }
844 }
845 
846 ////////////////////////////////////////////////////////////////////////////////
847 
849 {
850  Double_t returnVal=-1;
851 
852 
853  if (fBoostType=="AdaBoost") returnVal = this->AdaBoost (method,1);
854  else if (fBoostType=="RealAdaBoost") returnVal = this->AdaBoost (method,0);
855  else if (fBoostType=="Bagging") returnVal = this->Bagging ();
856  else{
857  Log() << kFATAL << "<Boost> unknown boost option " << fBoostType<< " called" << Endl;
858  }
859  fMethodWeight.push_back(returnVal);
860  return returnVal;
861 }
862 ////////////////////////////////////////////////////////////////////////////////
863 /// the standard (discrete or real) AdaBoost algorithm
864 
866 {
867  if (!method) {
868  Log() << kWARNING << " AdaBoost called without classifier reference - needed for calulating AdaBoost " << Endl;
869  return 0;
870  }
871 
872  Float_t w,v; Bool_t sig=kTRUE;
873  Double_t sumAll=0, sumWrong=0;
874  Bool_t* WrongDetection=new Bool_t[GetNEvents()];
875  QuickMVAProbEstimator *MVAProb=NULL;
876 
877  if (discreteAdaBoost) {
878  FindMVACut(method);
879  Log() << kDEBUG << " individual mva cut value = " << method->GetSignalReferenceCut() << Endl;
880  } else {
881  MVAProb=new TMVA::QuickMVAProbEstimator();
882  // the RealAdaBoost does use a simple "yes (signal)" or "no (background)"
883  // answer from your single MVA, but a "signal probability" instead (in the BDT case,
884  // that would be the 'purity' in the leaf node. For some MLP parameter, the MVA output
885  // can also interpreted as a probability, but here I try a genera aproach to get this
886  // probability from the MVA distributions...
887 
888  for (Long64_t evt=0; evt<GetNEvents(); evt++) {
889  const Event* ev = Data()->GetEvent(evt);
890  MVAProb->AddEvent(fMVAvalues->at(evt),ev->GetWeight(),ev->GetClass());
891  }
892  }
893 
894 
895  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=kTRUE;
896 
897  // finding the wrong events and calculating their total weights
898  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
899  const Event* ev = GetEvent(ievt);
900  sig=DataInfo().IsSignal(ev);
901  v = fMVAvalues->at(ievt);
902  w = ev->GetWeight();
903  sumAll += w;
904  if(!IsSilentFile())
905  {
906  if (fMonitorBoostedMethod) {
907  if (sig) {
910  }
911  else {
914  }
915  }
916  }
917 
918  if (discreteAdaBoost){
919  if (sig == method->IsSignalLike(fMVAvalues->at(ievt))){
920  WrongDetection[ievt]=kFALSE;
921  }else{
922  WrongDetection[ievt]=kTRUE;
923  sumWrong+=w;
924  }
925  }else{
926  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
927  mvaProb = 2*(mvaProb-0.5);
928  Int_t trueType;
929  if (DataInfo().IsSignal(ev)) trueType = 1;
930  else trueType = -1;
931  sumWrong+= w*trueType*mvaProb;
932  }
933  }
934 
935  fMethodError=sumWrong/sumAll;
936 
937  // calculating the fMethodError and the boostWeight out of it uses the formula
938  // w = ((1-err)/err)^beta
939 
940  Double_t boostWeight=0;
941 
942  if (fMethodError == 0) { //no misclassification made.. perfect, no boost ;)
943  Log() << kWARNING << "Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " << Endl;
944  }else{
945 
946  if (discreteAdaBoost)
948  else
949  boostWeight = TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
950 
951 
952  // std::cout << "boostweight = " << boostWeight << std::endl;
953 
954  // ADA boosting, rescaling the weight of the wrong events according to the error level
955  // over the entire test sample rescaling all the weights to have the same sum, but without
956  // touching the original weights (changing only the boosted weight of all the events)
957  // first reweight
958 
959  Double_t newSum=0., oldSum=0.;
960 
961 
962  Double_t boostfactor = TMath::Exp(boostWeight);
963 
964 
965  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
966  const Event* ev = Data()->GetEvent(ievt);
967  oldSum += ev->GetWeight();
968  if (discreteAdaBoost){
969  // events are classified as Signal OR background .. right or wrong
970  if (WrongDetection[ievt] && boostWeight != 0) {
971  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
972  else ev->ScaleBoostWeight(1./boostfactor);
973  }
974  // if (ievt<30) std::cout<<ievt<<" var0="<<ev->GetValue(0)<<" var1="<<ev->GetValue(1)<<" weight="<<ev->GetWeight() << " boostby:"<<boostfactor<<std::endl;
975 
976  }else{
977  // events are classified by their probability of being signal or background
978  // (eventually you should write this one - i.e. re-use the MVA value that were already
979  // calcualted and stroed.. however ,for the moement ..
980  Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
981  mvaProb = 2*(mvaProb-0.5);
982  // mvaProb = (1-mvaProb);
983 
984  Int_t trueType=1;
985  if (DataInfo().IsSignal(ev)) trueType = 1;
986  else trueType = -1;
987 
988  boostfactor = TMath::Exp(-1*boostWeight*trueType*mvaProb);
989  if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
990  else ev->ScaleBoostWeight(1./boostfactor);
991 
992  }
993  newSum += ev->GetWeight();
994  }
995 
996  Double_t normWeight = oldSum/newSum;
997  // next normalize the weights
998  Double_t normSig=0, normBkg=0;
999  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1000  const Event* ev = Data()->GetEvent(ievt);
1001  ev->ScaleBoostWeight(normWeight);
1002  if (ev->GetClass()) normSig+=ev->GetWeight();
1003  else normBkg+=ev->GetWeight();
1004  }
1005 
1007  results->GetHist("SoverBtotal")->SetBinContent(fCurrentMethodIdx+1, normSig/normBkg);
1008 
1009  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1010  const Event* ev = Data()->GetEvent(ievt);
1011 
1012  if (ev->GetClass()) ev->ScaleBoostWeight(oldSum/normSig/2);
1013  else ev->ScaleBoostWeight(oldSum/normBkg/2);
1014  }
1015  }
1016 
1017  delete[] WrongDetection;
1018  if (MVAProb) delete MVAProb;
1019 
1020  fBoostWeight = boostWeight; // used ONLY for the monitoring tree
1021 
1022  return boostWeight;
1023 }
1024 
1025 
1026 ////////////////////////////////////////////////////////////////////////////////
1027 /// Bagging or Bootstrap boosting, gives new random poisson weight for every event
1028 
1030 {
1031  TRandom3 *trandom = new TRandom3(fRandomSeed+fMethods.size());
1032  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1033  const Event* ev = Data()->GetEvent(ievt);
1035  }
1036  fBoostWeight = 1; // used ONLY for the monitoring tree
1037  return 1.;
1038 }
1039 
1040 
1041 ////////////////////////////////////////////////////////////////////////////////
1042 /// Get help message text
1043 ///
1044 /// typical length of text line:
1045 /// "|--------------------------------------------------------------|"
1046 
1048 {
1049  Log() << Endl;
1050  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1051  Log() << Endl;
1052  Log() << "This method combines several classifier of one species in a "<<Endl;
1053  Log() << "single multivariate quantity via the boost algorithm." << Endl;
1054  Log() << "the output is a weighted sum over all individual classifiers" <<Endl;
1055  Log() << "By default, the AdaBoost method is employed, which gives " << Endl;
1056  Log() << "events that were misclassified in the previous tree a larger " << Endl;
1057  Log() << "weight in the training of the following classifier."<<Endl;
1058  Log() << "Optionally, Bagged boosting can also be applied." << Endl;
1059  Log() << Endl;
1060  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1061  Log() << Endl;
1062  Log() << "The most important parameter in the configuration is the "<<Endl;
1063  Log() << "number of boosts applied (Boost_Num) and the choice of boosting"<<Endl;
1064  Log() << "(Boost_Type), which can be set to either AdaBoost or Bagging." << Endl;
1065  Log() << "AdaBoosting: The most important parameters in this configuration" <<Endl;
1066  Log() << "is the beta parameter (Boost_AdaBoostBeta) " << Endl;
1067  Log() << "When boosting a linear classifier, it is sometimes advantageous"<<Endl;
1068  Log() << "to transform the MVA output non-linearly. The following options" <<Endl;
1069  Log() << "are available: step, log, and minmax, the default is no transform."<<Endl;
1070  Log() <<Endl;
1071  Log() << "Some classifiers are hard to boost and do not improve much in"<<Endl;
1072  Log() << "their performance by boosting them, some even slightly deteriorate"<< Endl;
1073  Log() << "due to the boosting." <<Endl;
1074  Log() << "The booking of the boost method is special since it requires"<<Endl;
1075  Log() << "the booing of the method to be boosted and the boost itself."<<Endl;
1076  Log() << "This is solved by booking the method to be boosted and to add"<<Endl;
1077  Log() << "all Boost parameters, which all begin with \"Boost_\" to the"<<Endl;
1078  Log() << "options string. The factory separates the options and initiates"<<Endl;
1079  Log() << "the boost process. The TMVA macro directory contains the example"<<Endl;
1080  Log() << "macro \"Boost.C\"" <<Endl;
1081 }
1082 
1083 ////////////////////////////////////////////////////////////////////////////////
1084 
1086 {
1087  return 0;
1088 }
1089 
1090 ////////////////////////////////////////////////////////////////////////////////
1091 /// return boosted MVA response
1092 
1094 {
1095  Double_t mvaValue = 0;
1096  Double_t norm = 0;
1097  Double_t epsilon = TMath::Exp(-1.);
1098  //Double_t fact = TMath::Exp(-1.)+TMath::Exp(1.);
1099  for (UInt_t i=0;i< fMethods.size(); i++){
1100  MethodBase* m = dynamic_cast<MethodBase*>(fMethods[i]);
1101  if (m==0) continue;
1102  Double_t val = fTmpEvent ? m->GetMvaValue(fTmpEvent) : m->GetMvaValue();
1103  Double_t sigcut = m->GetSignalReferenceCut();
1104 
1105  // default is no transform
1106  if (fTransformString == "linear"){
1107 
1108  }
1109  else if (fTransformString == "log"){
1110  if (val < sigcut) val = sigcut;
1111 
1112  val = TMath::Log((val-sigcut)+epsilon);
1113  }
1114  else if (fTransformString == "step" ){
1115  if (m->IsSignalLike(val)) val = 1.;
1116  else val = -1.;
1117  }
1118  else if (fTransformString == "gauss"){
1119  val = TMath::Gaus((val-sigcut),1);
1120  }
1121  else {
1122  Log() << kFATAL << "error unknown transformation " << fTransformString<<Endl;
1123  }
1124  mvaValue+=val*fMethodWeight[i];
1125  norm +=fMethodWeight[i];
1126  // std::cout << "mva("<<i<<") = "<<val<<" " << valx<< " " << mvaValue<<" and sigcut="<<sigcut << std::endl;
1127  }
1128  mvaValue/=norm;
1129  // cannot determine error
1130  NoErrorCalc(err, errUpper);
1131 
1132  return mvaValue;
1133 }
1134 
1135 ////////////////////////////////////////////////////////////////////////////////
1136 /// Calculate the ROC integral of a single classifier or even the
1137 /// whole boosted classifier. The tree type (training or testing
1138 /// sample) is specified by 'eTT'.
1139 ///
1140 /// If tree type kTraining is set, the original training sample is
1141 /// used to compute the ROC integral (original weights).
1142 ///
1143 /// - singleMethod - if kTRUE, return ROC integral of single (last
1144 /// trained) classifier; if kFALSE, return ROC
1145 /// integral of full classifier
1146 ///
1147 /// - eTT - tree type (Types::kTraining / Types::kTesting)
1148 ///
1149 /// - CalcOverlapIntergral - if kTRUE, the overlap integral of the
1150 /// signal/background MVA distributions
1151 /// is calculated and stored in
1152 /// 'fOverlap_integral'
1153 
1155 {
1156  // set data sample training / testing
1157  Data()->SetCurrentType(eTT);
1158 
1159  MethodBase* method = singleMethod ? dynamic_cast<MethodBase*>(fMethods.back()) : 0; // ToDo CoVerity flags this line as there is no prtection against a zero-pointer delivered by dynamic_cast
1160  // to make CoVerity happy (although, OF COURSE, the last method in the commitee
1161  // has to be also of type MethodBase as ANY method is... hence the dynamic_cast
1162  // will never by "zero" ...
1163  if (singleMethod && !method) {
1164  Log() << kFATAL << " What do you do? Your method:"
1165  << fMethods.back()->GetName()
1166  << " seems not to be a propper TMVA method"
1167  << Endl;
1168  std::exit(1);
1169  }
1170  Double_t err = 0.0;
1171 
1172  // temporary renormalize the method weights in case of evaluation
1173  // of full classifier.
1174  // save the old normalization of the methods
1175  std::vector<Double_t> OldMethodWeight(fMethodWeight);
1176  if (!singleMethod) {
1177  // calculate sum of weights of all methods
1178  Double_t AllMethodsWeight = 0;
1179  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1180  AllMethodsWeight += fMethodWeight.at(i);
1181  // normalize the weights of the classifiers
1182  if (AllMethodsWeight != 0.0) {
1183  for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1184  fMethodWeight[i] /= AllMethodsWeight;
1185  }
1186  }
1187 
1188  // calculate MVA values
1189  Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
1190  std::vector <Float_t>* mvaRes;
1191  if (singleMethod && eTT==Types::kTraining)
1192  mvaRes = fMVAvalues; // values already calculated
1193  else {
1194  mvaRes = new std::vector <Float_t>(GetNEvents());
1195  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1196  GetEvent(ievt);
1197  (*mvaRes)[ievt] = singleMethod ? method->GetMvaValue(&err) : GetMvaValue(&err);
1198  }
1199  }
1200 
1201  // restore the method weights
1202  if (!singleMethod)
1203  fMethodWeight = OldMethodWeight;
1204 
1205  // now create histograms for calculation of the ROC integral
1206  Int_t signalClass = 0;
1207  if (DataInfo().GetClassInfo("Signal") != 0) {
1208  signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1209  }
1210  gTools().ComputeStat( GetEventCollection(eTT), mvaRes,
1211  meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
1212 
1214  xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
1215  xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.0001;
1216 
1217  // calculate ROC integral
1218  TH1* mva_s = new TH1F( "MVA_S", "MVA_S", fNbins, xmin, xmax );
1219  TH1* mva_b = new TH1F( "MVA_B", "MVA_B", fNbins, xmin, xmax );
1220  TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1221  if (CalcOverlapIntergral) {
1222  mva_s_overlap = new TH1F( "MVA_S_OVERLAP", "MVA_S_OVERLAP", fNbins, xmin, xmax );
1223  mva_b_overlap = new TH1F( "MVA_B_OVERLAP", "MVA_B_OVERLAP", fNbins, xmin, xmax );
1224  }
1225  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1226  const Event* ev = GetEvent(ievt);
1227  Float_t w = (eTT==Types::kTesting ? ev->GetWeight() : ev->GetOriginalWeight());
1228  if (DataInfo().IsSignal(ev)) mva_s->Fill( (*mvaRes)[ievt], w );
1229  else mva_b->Fill( (*mvaRes)[ievt], w );
1230 
1231  if (CalcOverlapIntergral) {
1232  Float_t w_ov = ev->GetWeight();
1233  if (DataInfo().IsSignal(ev))
1234  mva_s_overlap->Fill( (*mvaRes)[ievt], w_ov );
1235  else
1236  mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1237  }
1238  }
1239  gTools().NormHist( mva_s );
1240  gTools().NormHist( mva_b );
1241  PDF *fS = new PDF( "PDF Sig", mva_s, PDF::kSpline2 );
1242  PDF *fB = new PDF( "PDF Bkg", mva_b, PDF::kSpline2 );
1243 
1244  // calculate ROC integral from fS, fB
1245  Double_t ROC = MethodBase::GetROCIntegral(fS, fB);
1246 
1247  // calculate overlap integral
1248  if (CalcOverlapIntergral) {
1249  gTools().NormHist( mva_s_overlap );
1250  gTools().NormHist( mva_b_overlap );
1251 
1252  fOverlap_integral = 0.0;
1253  for (Int_t bin=1; bin<=mva_s_overlap->GetNbinsX(); bin++){
1254  Double_t bc_s = mva_s_overlap->GetBinContent(bin);
1255  Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1256  if (bc_s > 0.0 && bc_b > 0.0)
1257  fOverlap_integral += TMath::Min(bc_s, bc_b);
1258  }
1259 
1260  delete mva_s_overlap;
1261  delete mva_b_overlap;
1262  }
1263 
1264  delete mva_s;
1265  delete mva_b;
1266  delete fS;
1267  delete fB;
1268  if (!(singleMethod && eTT==Types::kTraining)) delete mvaRes;
1269 
1271 
1272  return ROC;
1273 }
1274 
1276 {
1277  // Calculate MVA values of current method fMethods.back() on
1278  // training sample
1279 
1281  MethodBase* method = dynamic_cast<MethodBase*>(fMethods.back());
1282  if (!method) {
1283  Log() << kFATAL << "dynamic cast to MethodBase* failed" <<Endl;
1284  return;
1285  }
1286  // calculate MVA values
1287  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1288  GetEvent(ievt);
1289  fMVAvalues->at(ievt) = method->GetMvaValue();
1290  }
1291 
1292  // fill cumulative mva distribution
1293 
1294 
1295 }
1296 
1297 
1298 ////////////////////////////////////////////////////////////////////////////////
1299 /// fill various monitoring histograms from information of the individual classifiers that
1300 /// have been boosted.
1301 /// of course.... this depends very much on the individual classifiers, and so far, only for
1302 /// Decision Trees, this monitoring is actually implemented
1303 
1305 {
1307 
1308  if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kDT) {
1309  TMVA::MethodDT* currentDT=dynamic_cast<TMVA::MethodDT*>(GetCurrentMethod(methodIndex));
1310  if (currentDT){
1311  if (stage == Types::kBoostProcBegin){
1312  results->Store(new TH1I("NodesBeforePruning","nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesBeforePruning");
1313  results->Store(new TH1I("NodesAfterPruning","nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesAfterPruning");
1314  }
1315 
1316  if (stage == Types::kBeforeTraining){
1317  }
1318  else if (stage == Types::kBeforeBoosting){
1319  results->GetHist("NodesBeforePruning")->SetBinContent(methodIndex+1,currentDT->GetNNodesBeforePruning());
1320  results->GetHist("NodesAfterPruning")->SetBinContent(methodIndex+1,currentDT->GetNNodes());
1321  }
1322  else if (stage == Types::kAfterBoosting){
1323 
1324  }
1325  else if (stage != Types::kBoostProcEnd){
1326  Log() << kINFO << "<Train> average number of nodes before/after pruning : "
1327  << results->GetHist("NodesBeforePruning")->GetMean() << " / "
1328  << results->GetHist("NodesAfterPruning")->GetMean()
1329  << Endl;
1330  }
1331  }
1332 
1333  }else if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kFisher) {
1334  if (stage == Types::kAfterBoosting){
1336  }
1337  }else{
1338  if (methodIndex < 3){
1339  Log() << kDEBUG << "No detailed boost monitoring for "
1340  << GetCurrentMethod(methodIndex)->GetMethodName()
1341  << " yet available " << Endl;
1342  }
1343  }
1344 
1345  //boosting plots universal for all classifiers 'typically for debug purposes only as they are not general enough'
1346 
1347  if (stage == Types::kBeforeBoosting){
1348  // if you want to display the weighted events for 2D case at each boost step:
1349  if (fDetailedMonitoring){
1350  // the following code is useful only for 2D examples - mainly illustration for debug/educational purposes:
1351  if (DataInfo().GetNVariables() == 2) {
1352  results->Store(new TH2F(Form("EventDistSig_%d",methodIndex),Form("EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1353  results->GetHist(Form("EventDistSig_%d",methodIndex))->SetMarkerColor(4);
1354  results->Store(new TH2F(Form("EventDistBkg_%d",methodIndex),Form("EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1355  results->GetHist(Form("EventDistBkg_%d",methodIndex))->SetMarkerColor(2);
1356 
1358  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1359  const Event* ev = GetEvent(ievt);
1360  Float_t w = ev->GetWeight();
1361  Float_t v0= ev->GetValue(0);
1362  Float_t v1= ev->GetValue(1);
1363  // if (ievt<3) std::cout<<ievt<<" var0="<<v0<<" var1="<<v1<<" weight="<<w<<std::endl;
1364  TH2* h;
1365  if (DataInfo().IsSignal(ev)) h=results->GetHist2D(Form("EventDistSig_%d",methodIndex));
1366  else h=results->GetHist2D(Form("EventDistBkg_%d",methodIndex));
1367  if (h) h->Fill(v0,v1,w);
1368  }
1369  }
1370  }
1371  }
1372 
1373  return;
1374 }
1375 
1376 
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
Config & gConfig()
Definition: Config.cxx:43
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:378
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3125
void SetMsgType(EMsgType t)
Definition: Configurable.h:131
static long int sum(long int i)
Definition: Factory.cxx:1786
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
float xmin
Definition: THbookFile.cxx:93
Random number generator class based on M.
Definition: TRandom3.h:29
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
std::vector< Float_t > * fMVAvalues
Definition: MethodBoost.h:190
THist< 1, int, THistStatContent > TH1I
Definition: THist.hxx:304
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
Definition: TRandom.cxx:414
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
long long Long64_t
Definition: RtypesCore.h:69
Double_t fROC_training
Definition: MethodBoost.h:184
void SingleTrain()
initialization
Stat_t GetSum() const
Definition: TArrayD.h:48
TString GetMethodName(Types::EMVA method) const
Definition: Types.cxx:130
std::vector< TH1 *> fTestSigMVAHist
Definition: MethodBoost.h:175
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
Double_t Log(Double_t x)
Definition: TMath.h:526
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
float Float_t
Definition: RtypesCore.h:53
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------— ...
static Types & Instance()
the the single instance of "Types" if existin already, or create it (Signleton)
Definition: Types.cxx:64
Int_t GetBoostNum()
Definition: MethodBoost.h:87
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4375
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:302
TH1 * h
Definition: legend2.C:5
MsgLogger & Log() const
Definition: Configurable.h:128
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Bool_t fDetailedMonitoring
Definition: MethodBoost.h:156
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4638
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
EAnalysisType
Definition: Types.h:129
MethodBoost(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
Definition: MethodBoost.cxx:90
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:186
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
Definition: MethodBase.h:361
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:6760
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:582
Basic string class.
Definition: TString.h:137
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:390
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1089
int Int_t
Definition: RtypesCore.h:41
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory and return a pointer to the created directory.
Definition: TDirectory.cxx:957
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
std::vector< TH1 *> fTrainBgdMVAHist
Definition: MethodBoost.h:170
const Ranking * CreateRanking()
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:8262
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:374
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Definition: Timer.cxx:129
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:837
void SetMethodDir(TDirectory *methodDir)
Definition: MethodBase.h:368
Double_t fOverlap_integral
Definition: MethodBoost.h:188
static void InhibitOutput()
Definition: MsgLogger.cxx:69
Tools & gTools()
Definition: Tools.cxx:79
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
TStopwatch timer
Definition: pirndm.C:37
void AddEvent(Double_t val, Double_t weight, Int_t type)
void ProcessOptions()
process user options
Double_t SingleBoost(MethodBase *method)
const Event * GetEvent() const
Definition: MethodBase.h:745
std::vector< Double_t > fMethodWeight
DataSet * Data() const
Definition: MethodBase.h:405
virtual ~MethodBoost(void)
destructor
TString fWeightFileDir
Definition: Config.h:100
UInt_t GetClass() const
Definition: Event.h:89
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
Definition: TAttMarker.h:43
Int_t GetNNodes()
Definition: MethodDT.h:109
IONames & GetIONames()
Definition: Config.h:78
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:395
virtual void ParseOptions()
options parser
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:403
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
TFile * GetFile() const
Definition: MethodBase.h:366
Definition: PDF.h:71
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:413
virtual void Delete(Option_t *option="")
Delete this object.
Definition: TObject.cxx:229
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9042
TString fHistoricOption
Definition: MethodBoost.h:196
RooCmdArg Timer(Bool_t flag=kTRUE)
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
Definition: DataSet.cxx:286
Service class for 2-Dim histogram classes.
Definition: TH2.h:36
SVector< double, 2 > v
Definition: Dict.h:5
const char * GetName() const
Definition: MethodBase.h:330
ClassInfo * GetClassInfo(Int_t clNum) const
class TMVA::Config::VariablePlotting fVariablePlotting
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
Double_t fBaggedSampleFraction
Definition: MethodBoost.h:160
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8323
TString fTransformString
Definition: MethodBoost.h:155
unsigned int UInt_t
Definition: RtypesCore.h:42
TMarker * m
Definition: textangle.C:8
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
Ssiz_t Length() const
Definition: TString.h:390
void ScaleBoostWeight(Double_t s) const
Definition: Event.h:114
const TString & GetJobName() const
Definition: MethodBase.h:326
const TString & GetMethodName() const
Definition: MethodBase.h:327
TAxis * GetYaxis()
Definition: TH1.h:325
float xmax
Definition: THbookFile.cxx:93
tomato 1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:618
virtual TDirectory * GetDirectory(const char *apath, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory named "apath".
Bool_t IsSilentFile()
Definition: MethodBase.h:375
REAL epsilon
Definition: triangle.c:617
void CreateMVAHistorgrams()
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Definition: TMath.cxx:452
UInt_t GetNVariables() const
Definition: MethodBase.h:341
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:233
virtual Double_t GetSeparationGain(const Double_t &nSelS, const Double_t &nSelB, const Double_t &nTotS, const Double_t &nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
TString & Remove(Ssiz_t pos)
Definition: TString.h:616
int Ssiz_t
Definition: RtypesCore.h:63
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
TString fBoostedMethodOptions
Definition: MethodBoost.h:164
Double_t Exp(Double_t x)
Definition: TMath.h:495
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:430
Bool_t fMonitorBoostedMethod
Definition: MethodBoost.h:166
#define ClassImp(name)
Definition: Rtypes.h:279
void RerouteTransformationHandler(TransformationHandler *fTargetTransformation)
Definition: MethodBase.h:399
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:44
std::vector< TH1 *> fTrainSigMVAHist
Definition: MethodBoost.h:169
TString fBoostedMethodTitle
Definition: MethodBoost.h:163
TH1 * GetHist(const TString &alias) const
Definition: Results.cxx:127
int type
Definition: TGX11.cxx:120
void SetBoostWeight(Double_t w) const
Definition: Event.h:113
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:114
The TH1 histogram class.
Definition: TH1.h:80
Double_t fMethodError
Definition: MethodBoost.h:182
void AddPreDefVal(const T &)
Definition: Configurable.h:174
void GetHelpMessage() const
Get help message text.
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------— ...
UInt_t GetNumber() const
Definition: ClassInfo.h:73
Int_t GetNNodesBeforePruning()
Definition: MethodDT.h:108
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:85
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:420
void ComputeStat(const std::vector< TMVA::Event *> &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:215
const TString & GetOptions() const
Definition: Configurable.h:90
virtual void TestClassification()
initialization
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1652
TString fBoostedMethodName
Definition: MethodBoost.h:162
#define REGISTER_METHOD(CLASS)
for example
std::vector< IMethod * > fMethods
Abstract ClassifierFactory template that handles arbitrary types.
Double_t fAdaBoostBeta
Definition: MethodBoost.h:158
Double_t GetMVAProbAt(Double_t value)
TH2 * GetHist2D(const TString &alias) const
Definition: Results.cxx:136
DataSetManager * fDataSetManager
Definition: MethodBoost.h:192
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:435
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:590
virtual Double_t GetSeparationIndex(const Double_t &s, const Double_t &b)=0
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202
const Event * fTmpEvent
Definition: MethodBase.h:408
void SetWeightFileDir(TString fileDir)
set directory of weight file
Double_t GetOriginalWeight() const
Definition: Event.h:87
Bool_t fHistoricBoolOption
Definition: MethodBoost.h:197
void InitHistos()
initialisation routine
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:356
#define NULL
Definition: Rtypes.h:82
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Definition: THist.hxx:301
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:229
Bool_t IsSignal(const Event *ev) const
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:433
A TTree object has a header with a name and a title.
Definition: TTree.h:98
TTree * fMonitorTree
Definition: MethodBoost.h:180
void Store(TObject *obj, const char *alias=0)
Definition: Results.cxx:83
virtual Int_t GetNbinsX() const
Definition: TH1.h:301
std::vector< TH1 *> fBTrainSigMVAHist
Definition: MethodBoost.h:172
static void EnableOutput()
Definition: MsgLogger.cxx:70
Double_t fBoostWeight
Definition: MethodBoost.h:181
const Bool_t kTRUE
Definition: Rtypes.h:91
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:155
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Definition: THist.hxx:308
std::vector< TH1 *> fBTrainBgdMVAHist
Definition: MethodBoost.h:173
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
Types::EMVA GetMethodType() const
Definition: MethodBase.h:329
virtual void TestClassification()
initialization
const Event * GetEvent() const
Definition: DataSet.cxx:211
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:432
std::vector< TH1 *> fTestBgdMVAHist
Definition: MethodBoost.h:177
TAxis * GetXaxis()
Definition: TH1.h:324
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:819
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:360
const char * Data() const
Definition: TString.h:349
Bool_t IsModelPersistence()
Definition: MethodBase.h:379