Logo ROOT  
Reference Guide
MethodBoost.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss,Or Cohen, Jan Therhaag, Eckhard von Toerne
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodCompositeBase *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Virtual base class for all MVA method *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmazer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
19 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
20 * *
21 * Copyright (c) 2005-2011: *
22 * CERN, Switzerland *
23 * U. of Victoria, Canada *
24 * MPI-K Heidelberg, Germany *
25 * U. of Bonn, Germany *
26 * *
27 * Redistribution and use in source and binary forms, with or without *
28 * modification, are permitted according to the terms listed in LICENSE *
29 * (http://tmva.sourceforge.net/LICENSE) *
30 **********************************************************************************/
31
32/*! \class TMVA::MethodBoost
33\ingroup TMVA
34
35Class for boosting a TMVA method
36
37This class is meant to boost a single classifier. Boosting means
38training the classifier a few times. Every time the weights of the
39events are modified according to how well the classifier performed
40on the test sample.
41
42*/
43
44#include "TMVA/MethodBoost.h"
45
47#include "TMVA/Config.h"
48#include "TMVA/Configurable.h"
49#include "TMVA/DataSet.h"
50#include "TMVA/DataSetInfo.h"
51#include "TMVA/IMethod.h"
52#include "TMVA/MethodBase.h"
53#include "TMVA/MethodCategory.h"
55#include "TMVA/MethodDT.h"
56#include "TMVA/MethodFisher.h"
57#include "TMVA/PDF.h"
58#include "TMVA/Results.h"
59#include "TMVA/Timer.h"
60#include "TMVA/Tools.h"
61#include "TMVA/Types.h"
62
63#include "TMVA/SeparationBase.h"
65#include "TMVA/GiniIndex.h"
66#include "TMVA/CrossEntropy.h"
69
70#include "TRandom3.h"
71#include "TMath.h"
72#include "TH1F.h"
73#include "TH2F.h"
74#include "TSpline.h"
75#include "TDirectory.h"
76#include "TTree.h"
77
78#include <iostream>
79#include <algorithm>
80#include <vector>
81#include <cmath>
82
83
84REGISTER_METHOD(Boost)
85
87
88////////////////////////////////////////////////////////////////////////////////
89
91 const TString& methodTitle,
92 DataSetInfo& theData,
93 const TString& theOption ) :
94 TMVA::MethodCompositeBase( jobName, Types::kBoost, methodTitle, theData, theOption)
95 , fBoostNum(0)
96 , fDetailedMonitoring(kFALSE)
97 , fAdaBoostBeta(0)
98 , fRandomSeed(0)
99 , fBaggedSampleFraction(0)
100 , fBoostedMethodTitle(methodTitle)
101 , fBoostedMethodOptions(theOption)
102 , fMonitorBoostedMethod(kFALSE)
103 , fMonitorTree(0)
104 , fBoostWeight(0)
105 , fMethodError(0)
106 , fROC_training(0.0)
107 , fOverlap_integral(0.0)
108 , fMVAvalues(0)
109{
110 fMVAvalues = new std::vector<Float_t>;
111 fDataSetManager = NULL;
113}
114
115////////////////////////////////////////////////////////////////////////////////
116
118 const TString& theWeightFile)
119 : TMVA::MethodCompositeBase( Types::kBoost, dsi, theWeightFile)
120 , fBoostNum(0)
121 , fDetailedMonitoring(kFALSE)
122 , fAdaBoostBeta(0)
123 , fRandomSeed(0)
124 , fBaggedSampleFraction(0)
125 , fBoostedMethodTitle("")
126 , fBoostedMethodOptions("")
127 , fMonitorBoostedMethod(kFALSE)
128 , fMonitorTree(0)
129 , fBoostWeight(0)
130 , fMethodError(0)
131 , fROC_training(0.0)
132 , fOverlap_integral(0.0)
133 , fMVAvalues(0)
134{
135 fMVAvalues = new std::vector<Float_t>;
136 fDataSetManager = NULL;
138}
139
140////////////////////////////////////////////////////////////////////////////////
141/// destructor
142
144{
145 fMethodWeight.clear();
146
147 // the histogram themselves are deleted when the file is closed
148
149 fTrainSigMVAHist.clear();
150 fTrainBgdMVAHist.clear();
151 fBTrainSigMVAHist.clear();
152 fBTrainBgdMVAHist.clear();
153 fTestSigMVAHist.clear();
154 fTestBgdMVAHist.clear();
155
156 if (fMVAvalues) {
157 delete fMVAvalues;
158 fMVAvalues = 0;
159 }
160}
161
162
163////////////////////////////////////////////////////////////////////////////////
164/// Boost can handle classification with 2 classes and regression with one regression-target
165
167{
168 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
169 // if (type == Types::kRegression && numberTargets == 1) return kTRUE;
170 return kFALSE;
171}
172
173
174////////////////////////////////////////////////////////////////////////////////
175
177{
178 DeclareOptionRef( fBoostNum = 1, "Boost_Num",
179 "Number of times the classifier is boosted" );
180
181 DeclareOptionRef( fMonitorBoostedMethod = kTRUE, "Boost_MonitorMethod",
182 "Write monitoring histograms for each boosted classifier" );
183
184 DeclareOptionRef( fDetailedMonitoring = kFALSE, "Boost_DetailedMonitoring",
185 "Produce histograms for detailed boost monitoring" );
186
187 DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
188 AddPreDefVal(TString("RealAdaBoost"));
189 AddPreDefVal(TString("AdaBoost"));
190 AddPreDefVal(TString("Bagging"));
191
192 DeclareOptionRef(fBaggedSampleFraction=.6,"Boost_BaggedSampleFraction","Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
193
194 DeclareOptionRef( fAdaBoostBeta = 1.0, "Boost_AdaBoostBeta",
195 "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
196
197 DeclareOptionRef( fTransformString = "step", "Boost_Transform",
198 "Type of transform applied to every boosted method linear, log, step" );
199 AddPreDefVal(TString("step"));
200 AddPreDefVal(TString("linear"));
201 AddPreDefVal(TString("log"));
202 AddPreDefVal(TString("gauss"));
203
204 DeclareOptionRef( fRandomSeed = 0, "Boost_RandomSeed",
205 "Seed for random number generator used for bagging" );
206
207 TMVA::MethodCompositeBase::fMethods.reserve(fBoostNum);
208}
209
210////////////////////////////////////////////////////////////////////////////////
211/// options that are used ONLY for the READER to ensure backward compatibility
212/// they are hence without any effect (the reader is only reading the training
213/// options that HAD been used at the training of the .xml weight file at hand
214
216{
217
219
220 DeclareOptionRef( fHistoricOption = "ByError", "Boost_MethodWeightType",
221 "How to set the final weight of the boosted classifiers" );
222 AddPreDefVal(TString("ByError"));
223 AddPreDefVal(TString("Average"));
224 AddPreDefVal(TString("ByROC"));
225 AddPreDefVal(TString("ByOverlap"));
226 AddPreDefVal(TString("LastMethod"));
227
228 DeclareOptionRef( fHistoricOption = "step", "Boost_Transform",
229 "Type of transform applied to every boosted method linear, log, step" );
230 AddPreDefVal(TString("step"));
231 AddPreDefVal(TString("linear"));
232 AddPreDefVal(TString("log"));
233 AddPreDefVal(TString("gauss"));
234
235 // this option here
236 //DeclareOptionRef( fBoostType = "AdaBoost", "Boost_Type", "Boosting type for the classifiers" );
237 // still exists, but these two possible values
238 AddPreDefVal(TString("HighEdgeGauss"));
239 AddPreDefVal(TString("HighEdgeCoPara"));
240 // have been deleted .. hope that works :)
241
242 DeclareOptionRef( fHistoricBoolOption, "Boost_RecalculateMVACut",
243 "Recalculate the classifier MVA Signallike cut at every boost iteration" );
244
245}
246
247////////////////////////////////////////////////////////////////////////////////
248/// just registering the string from which the boosted classifier will be created
249
251{
252 fBoostedMethodName = Types::Instance().GetMethodName( theMethod );
253 fBoostedMethodTitle = methodTitle;
254 fBoostedMethodOptions = theOption;
255 TString opts=theOption;
256 opts.ToLower();
257 // if (opts.Contains("vartransform")) Log() << kFATAL << "It is not possible to use boost in conjunction with variable transform. Please remove either Boost_Num or VarTransform from the option string"<< methodTitle<<Endl;
258
259 return kTRUE;
260}
261
262////////////////////////////////////////////////////////////////////////////////
263
265{
266}
267
268////////////////////////////////////////////////////////////////////////////////
269/// initialisation routine
270
272{
273
274 Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
275
276 results->Store(new TH1F("MethodWeight","Normalized Classifier Weight",fBoostNum,0,fBoostNum),"ClassifierWeight");
277 results->Store(new TH1F("BoostWeight","Boost Weight",fBoostNum,0,fBoostNum),"BoostWeight");
278 results->Store(new TH1F("ErrFraction","Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),"ErrorFraction");
279 if (fDetailedMonitoring){
280 results->Store(new TH1F("ROCIntegral_test","ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegral_test");
281 results->Store(new TH1F("ROCIntegralBoosted_test","ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_test");
282 results->Store(new TH1F("ROCIntegral_train","ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),"ROCIntegral_train");
283 results->Store(new TH1F("ROCIntegralBoosted_train","ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),"ROCIntegralBoosted_train");
284 results->Store(new TH1F("OverlapIntegal_train","Overlap integral (training sample)",fBoostNum,0,fBoostNum),"Overlap");
285 }
286
287
288 results->GetHist("ClassifierWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
289 results->GetHist("ClassifierWeight")->GetYaxis()->SetTitle("Classifier Weight");
290 results->GetHist("BoostWeight")->GetXaxis()->SetTitle("Index of boosted classifier");
291 results->GetHist("BoostWeight")->GetYaxis()->SetTitle("Boost Weight");
292 results->GetHist("ErrorFraction")->GetXaxis()->SetTitle("Index of boosted classifier");
293 results->GetHist("ErrorFraction")->GetYaxis()->SetTitle("Error Fraction");
294 if (fDetailedMonitoring){
295 results->GetHist("ROCIntegral_test")->GetXaxis()->SetTitle("Index of boosted classifier");
296 results->GetHist("ROCIntegral_test")->GetYaxis()->SetTitle("ROC integral of single classifier");
297 results->GetHist("ROCIntegralBoosted_test")->GetXaxis()->SetTitle("Number of boosts");
298 results->GetHist("ROCIntegralBoosted_test")->GetYaxis()->SetTitle("ROC integral boosted");
299 results->GetHist("ROCIntegral_train")->GetXaxis()->SetTitle("Index of boosted classifier");
300 results->GetHist("ROCIntegral_train")->GetYaxis()->SetTitle("ROC integral of single classifier");
301 results->GetHist("ROCIntegralBoosted_train")->GetXaxis()->SetTitle("Number of boosts");
302 results->GetHist("ROCIntegralBoosted_train")->GetYaxis()->SetTitle("ROC integral boosted");
303 results->GetHist("Overlap")->GetXaxis()->SetTitle("Index of boosted classifier");
304 results->GetHist("Overlap")->GetYaxis()->SetTitle("Overlap integral");
305 }
306
307 results->Store(new TH1F("SoverBtotal","S/B in reweighted training sample",fBoostNum,0,fBoostNum),"SoverBtotal");
308 results->GetHist("SoverBtotal")->GetYaxis()->SetTitle("S/B (boosted sample)");
309 results->GetHist("SoverBtotal")->GetXaxis()->SetTitle("Index of boosted classifier");
310
311 results->Store(new TH1F("SeparationGain","SeparationGain",fBoostNum,0,fBoostNum),"SeparationGain");
312 results->GetHist("SeparationGain")->GetYaxis()->SetTitle("SeparationGain");
313 results->GetHist("SeparationGain")->GetXaxis()->SetTitle("Index of boosted classifier");
314
315
316
317 fMonitorTree= new TTree("MonitorBoost","Boost variables");
318 fMonitorTree->Branch("iMethod",&fCurrentMethodIdx,"iMethod/I");
319 fMonitorTree->Branch("boostWeight",&fBoostWeight,"boostWeight/D");
320 fMonitorTree->Branch("errorFraction",&fMethodError,"errorFraction/D");
321 fMonitorBoostedMethod = kTRUE;
322
323}
324
325
326////////////////////////////////////////////////////////////////////////////////
327
329{
330 Log() << kDEBUG << "CheckSetup: fBoostType="<<fBoostType << Endl;
331 Log() << kDEBUG << "CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<Endl;
332 Log() << kDEBUG << "CheckSetup: fBoostWeight="<<fBoostWeight<<Endl;
333 Log() << kDEBUG << "CheckSetup: fMethodError="<<fMethodError<<Endl;
334 Log() << kDEBUG << "CheckSetup: fBoostNum="<<fBoostNum << Endl;
335 Log() << kDEBUG << "CheckSetup: fRandomSeed=" << fRandomSeed<< Endl;
336 Log() << kDEBUG << "CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<Endl;
337 Log() << kDEBUG << "CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<Endl;
338 Log() << kDEBUG << "CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod? "true" : "false") << Endl;
339 Log() << kDEBUG << "CheckSetup: MName=" << fBoostedMethodName << " Title="<< fBoostedMethodTitle<< Endl;
340 Log() << kDEBUG << "CheckSetup: MOptions="<< fBoostedMethodOptions << Endl;
341 Log() << kDEBUG << "CheckSetup: fMonitorTree=" << fMonitorTree <<Endl;
342 Log() << kDEBUG << "CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx << Endl;
343 if (fMethods.size()>0) Log() << kDEBUG << "CheckSetup: fMethods[0]" <<fMethods[0]<<Endl;
344 Log() << kDEBUG << "CheckSetup: fMethodWeight.size()" << fMethodWeight.size() << Endl;
345 if (fMethodWeight.size()>0) Log() << kDEBUG << "CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<Endl;
346 Log() << kDEBUG << "CheckSetup: trying to repair things" << Endl;
347
348}
349////////////////////////////////////////////////////////////////////////////////
350
352{
353 TDirectory* methodDir( 0 );
354 TString dirName,dirTitle;
355 Int_t StopCounter=0;
356 Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
357
358
359 InitHistos();
360
361 if (Data()->GetNTrainingEvents()==0) Log() << kFATAL << "<Train> Data() has zero events" << Endl;
362 Data()->SetCurrentType(Types::kTraining);
363
364 if (fMethods.size() > 0) fMethods.clear();
365 fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0);
366
367 Log() << kINFO << "Training "<< fBoostNum << " " << fBoostedMethodName << " with title " << fBoostedMethodTitle << " Classifiers ... patience please" << Endl;
368 Timer timer( fBoostNum, GetName() );
369
370 ResetBoostWeights();
371
372 // clean boosted method options
373 CleanBoostOptions();
374
375
376 // remove transformations for individual boosting steps
377 // the transformation of the main method will be rerouted to each of the boost steps
378 Ssiz_t varTrafoStart=fBoostedMethodOptions.Index("~VarTransform=");
379 if (varTrafoStart >0) {
380 Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(":",varTrafoStart);
381 if (varTrafoEnd<varTrafoStart)
382 varTrafoEnd=fBoostedMethodOptions.Length();
383 fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
384 }
385
386 //
387 // training and boosting the classifiers
388 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
389 // the first classifier shows the option string output, the rest not
390 if (fCurrentMethodIdx>0) TMVA::MsgLogger::InhibitOutput();
391
393 fBoostedMethodName.Data(), GetJobName(), Form("%s_B%04i", fBoostedMethodTitle.Data(), fCurrentMethodIdx),
394 DataInfo(), fBoostedMethodOptions);
396
397 // suppressing the rest of the classifier output the right way
398 fCurrentMethod = (dynamic_cast<MethodBase*>(method));
399
400 if (fCurrentMethod==0) {
401 Log() << kFATAL << "uups.. guess the booking of the " << fCurrentMethodIdx << "-th classifier somehow failed" << Endl;
402 return; // hope that makes coverity happy (as if fears I might use the pointer later on, not knowing that FATAL exits
403 }
404
405 // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects) // DSMTEST
406 if (fCurrentMethod->GetMethodType() == Types::kCategory) { // DSMTEST
407 MethodCategory *methCat = (dynamic_cast<MethodCategory*>(fCurrentMethod)); // DSMTEST
408 if (!methCat) // DSMTEST
409 Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" << Endl; // DSMTEST
410 methCat->fDataSetManager = fDataSetManager; // DSMTEST
411 } // DSMTEST
412
413 fCurrentMethod->SetMsgType(kWARNING);
414 fCurrentMethod->SetupMethod();
415 fCurrentMethod->ParseOptions();
416 // put SetAnalysisType here for the needs of MLP
417 fCurrentMethod->SetAnalysisType( GetAnalysisType() );
418 fCurrentMethod->ProcessSetup();
419 fCurrentMethod->CheckSetup();
420
421
422 // reroute transformationhandler
423 fCurrentMethod->RerouteTransformationHandler (&(this->GetTransformationHandler()));
424
425
426 // creating the directory of the classifier
427 if(!IsSilentFile())
428 {
429 if (fMonitorBoostedMethod) {
430 methodDir=GetFile()->GetDirectory(dirName=Form("%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
431 if (methodDir==0) {
432 methodDir=BaseDir()->mkdir(dirName,dirTitle=Form("Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
433 }
434 fCurrentMethod->SetMethodDir(methodDir);
435 fCurrentMethod->BaseDir()->cd();
436 }
437 }
438
439 // training
440 TMVA::MethodCompositeBase::fMethods.push_back(method);
441 timer.DrawProgressBar( fCurrentMethodIdx );
442 if (fCurrentMethodIdx==0) MonitorBoost(Types::kBoostProcBegin,fCurrentMethodIdx);
443 MonitorBoost(Types::kBeforeTraining,fCurrentMethodIdx);
444 TMVA::MsgLogger::InhibitOutput(); //suppressing Logger outside the method
445 if (fBoostType=="Bagging") Bagging(); // you want also to train the first classifier on a bagged sample
446 SingleTrain();
448 if(!IsSilentFile())fCurrentMethod->WriteMonitoringHistosToFile();
449
450 // calculate MVA values of current method for all events in training sample
451 // (used later on to get 'misclassified events' etc for the boosting
452 CalcMVAValues();
453
454 if(!IsSilentFile()) if (fCurrentMethodIdx==0 && fMonitorBoostedMethod) CreateMVAHistorgrams();
455
456 // get ROC integral and overlap integral for single method on
457 // training sample if fMethodWeightType == "ByROC" or the user
458 // wants detailed monitoring
459
460 // boosting (reweight training sample)
461 MonitorBoost(Types::kBeforeBoosting,fCurrentMethodIdx);
462 SingleBoost(fCurrentMethod);
463
464 MonitorBoost(Types::kAfterBoosting,fCurrentMethodIdx);
465 results->GetHist("BoostWeight")->SetBinContent(fCurrentMethodIdx+1,fBoostWeight);
466 results->GetHist("ErrorFraction")->SetBinContent(fCurrentMethodIdx+1,fMethodError);
467
468 if (fDetailedMonitoring) {
469 fROC_training = GetBoostROCIntegral(kTRUE, Types::kTraining, kTRUE);
470 results->GetHist("ROCIntegral_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kTRUE, Types::kTesting));
471 results->GetHist("ROCIntegralBoosted_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTesting));
472 results->GetHist("ROCIntegral_train")->SetBinContent(fCurrentMethodIdx+1, fROC_training);
473 results->GetHist("ROCIntegralBoosted_train")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTraining));
474 results->GetHist("Overlap")->SetBinContent(fCurrentMethodIdx+1, fOverlap_integral);
475 }
476
477
478
479 fMonitorTree->Fill();
480
481 // stop boosting if needed when error has reached 0.5
482 // thought of counting a few steps, but it doesn't seem to be necessary
483 Log() << kDEBUG << "AdaBoost (methodErr) err = " << fMethodError << Endl;
484 if (fMethodError > 0.49999) StopCounter++;
485 if (StopCounter > 0 && fBoostType != "Bagging") {
486 timer.DrawProgressBar( fBoostNum );
487 fBoostNum = fCurrentMethodIdx+1;
488 Log() << kINFO << "Error rate has reached 0.5 ("<< fMethodError<<"), boosting process stopped at #" << fBoostNum << " classifier" << Endl;
489 if (fBoostNum < 5)
490 Log() << kINFO << "The classifier might be too strong to boost with Beta = " << fAdaBoostBeta << ", try reducing it." <<Endl;
491 break;
492 }
493 }
494
495 //as MethodBoost acts not on a private event sample (like MethodBDT does), we need to remember not
496 // to leave "boosted" events to the next classifier in the factory
497
498 ResetBoostWeights();
499
500 Timer* timer1= new Timer( fBoostNum, GetName() );
501 // normalizing the weights of the classifiers
502 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
503 // performing post-boosting actions
504
505 timer1->DrawProgressBar( fCurrentMethodIdx );
506
507 if (fCurrentMethodIdx==fBoostNum) {
508 Log() << kINFO << "Elapsed time: " << timer1->GetElapsedTime()
509 << " " << Endl;
510 }
511
512 TH1F* tmp = dynamic_cast<TH1F*>( results->GetHist("ClassifierWeight") );
513 if (tmp) tmp->SetBinContent(fCurrentMethodIdx+1,fMethodWeight[fCurrentMethodIdx]);
514
515 }
516
517 // Ensure that in case of only 1 boost the method weight equals
518 // 1.0. This avoids unexpected behaviour in case of very bad
519 // classifiers which have fBoostWeight=1 or fMethodError=0.5,
520 // because their weight would be set to zero. This behaviour is
521 // not ok if one boosts just one time.
522 if (fMethods.size()==1) fMethodWeight[0] = 1.0;
523
524 MonitorBoost(Types::kBoostProcEnd);
525
526 delete timer1;
527}
528
529////////////////////////////////////////////////////////////////////////////////
530
532{
533 fBoostedMethodOptions=GetOptions();
534}
535
536////////////////////////////////////////////////////////////////////////////////
537
539{
540 if (fBoostNum <=0) Log() << kFATAL << "CreateHistograms called before fBoostNum is initialized" << Endl;
541 // calculating histograms boundaries and creating histograms..
542 // nrms = number of rms around the average to use for outline (of the 0 classifier)
543 Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
544 Int_t signalClass = 0;
545 if (DataInfo().GetClassInfo("Signal") != 0) {
546 signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
547 }
548 gTools().ComputeStat( GetEventCollection( Types::kMaxTreeType ), fMVAvalues,
549 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
550
552 xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
553 xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.00001;
554
555 // creating all the histograms
556 for (UInt_t imtd=0; imtd<fBoostNum; imtd++) {
557 fTrainSigMVAHist .push_back( new TH1F( Form("MVA_Train_S_%04i",imtd), "MVA_Train_S", fNbins, xmin, xmax ) );
558 fTrainBgdMVAHist .push_back( new TH1F( Form("MVA_Train_B%04i", imtd), "MVA_Train_B", fNbins, xmin, xmax ) );
559 fBTrainSigMVAHist.push_back( new TH1F( Form("MVA_BTrain_S%04i",imtd), "MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
560 fBTrainBgdMVAHist.push_back( new TH1F( Form("MVA_BTrain_B%04i",imtd), "MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
561 fTestSigMVAHist .push_back( new TH1F( Form("MVA_Test_S%04i", imtd), "MVA_Test_S", fNbins, xmin, xmax ) );
562 fTestBgdMVAHist .push_back( new TH1F( Form("MVA_Test_B%04i", imtd), "MVA_Test_B", fNbins, xmin, xmax ) );
563 }
564}
565
566////////////////////////////////////////////////////////////////////////////////
567/// resetting back the boosted weights of the events to 1
568
570{
571 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
572 const Event *ev = Data()->GetEvent(ievt);
573 ev->SetBoostWeight( 1.0 );
574 }
575}
576
577////////////////////////////////////////////////////////////////////////////////
578
580{
581 TDirectory* dir=0;
582 if (fMonitorBoostedMethod) {
583 for (UInt_t imtd=0;imtd<fBoostNum;imtd++) {
584
585 //writing the histograms in the specific classifier's directory
586 MethodBase* m = dynamic_cast<MethodBase*>(fMethods[imtd]);
587 if (!m) continue;
588 dir = m->BaseDir();
589 dir->cd();
590 fTrainSigMVAHist[imtd]->SetDirectory(dir);
591 fTrainSigMVAHist[imtd]->Write();
592 fTrainBgdMVAHist[imtd]->SetDirectory(dir);
593 fTrainBgdMVAHist[imtd]->Write();
594 fBTrainSigMVAHist[imtd]->SetDirectory(dir);
595 fBTrainSigMVAHist[imtd]->Write();
596 fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
597 fBTrainBgdMVAHist[imtd]->Write();
598 }
599 }
600
601 // going back to the original folder
602 BaseDir()->cd();
603
604 fMonitorTree->Write();
605}
606
607////////////////////////////////////////////////////////////////////////////////
608
610{
612 if (fMonitorBoostedMethod) {
613 UInt_t nloop = fTestSigMVAHist.size();
614 if (fMethods.size()<nloop) nloop = fMethods.size();
615 //running over all the events and populating the test MVA histograms
616 Data()->SetCurrentType(Types::kTesting);
617 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
618 const Event* ev = GetEvent(ievt);
619 Float_t w = ev->GetWeight();
620 if (DataInfo().IsSignal(ev)) {
621 for (UInt_t imtd=0; imtd<nloop; imtd++) {
622 fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
623 }
624 }
625 else {
626 for (UInt_t imtd=0; imtd<nloop; imtd++) {
627 fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
628 }
629 }
630 }
631 Data()->SetCurrentType(Types::kTraining);
632 }
633}
634
635////////////////////////////////////////////////////////////////////////////////
636
638{
640 if (treetype==Types::kTraining) return;
641 UInt_t nloop = fTestSigMVAHist.size();
642 if (fMethods.size()<nloop) nloop = fMethods.size();
643 if (fMonitorBoostedMethod) {
644 TDirectory* dir=0;
645 for (UInt_t imtd=0;imtd<nloop;imtd++) {
646 //writing the histograms in the specific classifier's directory
647 MethodBase* mva = dynamic_cast<MethodBase*>(fMethods[imtd]);
648 if (!mva) continue;
649 dir = mva->BaseDir();
650 if (dir==0) continue;
651 dir->cd();
652 fTestSigMVAHist[imtd]->SetDirectory(dir);
653 fTestSigMVAHist[imtd]->Write();
654 fTestBgdMVAHist[imtd]->SetDirectory(dir);
655 fTestBgdMVAHist[imtd]->Write();
656 }
657 }
658}
659
660////////////////////////////////////////////////////////////////////////////////
661/// process user options
662
664{
665}
666
667////////////////////////////////////////////////////////////////////////////////
668/// initialization
669
671{
672 Data()->SetCurrentType(Types::kTraining);
673 MethodBase* meth = dynamic_cast<MethodBase*>(GetLastMethod());
674 if (meth){
675 meth->SetSilentFile(IsSilentFile());
676 if(IsModelPersistence()){
677 TString _fFileDir= DataInfo().GetName();
678 _fFileDir+="/"+gConfig().GetIONames().fWeightFileDir;
679 meth->SetWeightFileDir(_fFileDir);
680 }
681 meth->SetModelPersistence(IsModelPersistence());
682 meth->TrainMethod();
683 }
684}
685
686////////////////////////////////////////////////////////////////////////////////
687/// find the CUT on the individual MVA that defines an event as
688/// correct or misclassified (to be used in the boosting process)
689
691{
692 if (!method || method->GetMethodType() == Types::kDT ){ return;}
693
694 // creating a fine histograms containing the error rate
695 const Int_t nBins=10001;
696 Double_t minMVA=150000;
697 Double_t maxMVA=-150000;
698 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
699 GetEvent(ievt);
700 Double_t val=method->GetMvaValue();
701 //Helge .. I think one could very well use fMVAValues for that ... -->to do
702 if (val>maxMVA) maxMVA=val;
703 if (val<minMVA) minMVA=val;
704 }
705 maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
706
707 TH1D *mvaS = new TH1D(Form("MVAS_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
708 TH1D *mvaB = new TH1D(Form("MVAB_%d",fCurrentMethodIdx) ,"",nBins,minMVA,maxMVA);
709 TH1D *mvaSC = new TH1D(Form("MVASC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
710 TH1D *mvaBC = new TH1D(Form("MVABC_%d",fCurrentMethodIdx),"",nBins,minMVA,maxMVA);
711
712
713 Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
714 if (fDetailedMonitoring){
715 results->Store(mvaS, Form("MVAS_%d",fCurrentMethodIdx));
716 results->Store(mvaB, Form("MVAB_%d",fCurrentMethodIdx));
717 results->Store(mvaSC,Form("MVASC_%d",fCurrentMethodIdx));
718 results->Store(mvaBC,Form("MVABC_%d",fCurrentMethodIdx));
719 }
720
721 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
722
723 Double_t weight = GetEvent(ievt)->GetWeight();
724 Double_t mvaVal=method->GetMvaValue();
725 if (DataInfo().IsSignal(GetEvent(ievt))){
726 mvaS->Fill(mvaVal,weight);
727 }else {
728 mvaB->Fill(mvaVal,weight);
729 }
730 }
731 SeparationBase *sepGain;
732
733
734 // Boosting should use Misclassification not Gini Index (changed, Helge 31.5.2013)
735 // WARNING! It works with Misclassification only if you fix the signal to
736 // background at every step. Strangely enough, there are better results
737 // ( as seen with BDT ) if you use Gini Index, and accept that sometimes no
738 // sensible cut is found - i.e. the cut is then outside the MVA value range,
739 // all events are classified as background and then according to the Boost
740 // algorithm something is renormed 'automatically' ... so that in the next
741 // step again the result is something sensible.
742 // Strange ... that THIS is supposed to be right?
743
744 // SeparationBase *sepGain2 = new MisClassificationError();
745 //sepGain = new MisClassificationError();
746 sepGain = new GiniIndex();
747 //sepGain = new CrossEntropy();
748
749 Double_t sTot = mvaS->GetSum();
750 Double_t bTot = mvaB->GetSum();
751
752 mvaSC->SetBinContent(1,mvaS->GetBinContent(1));
753 mvaBC->SetBinContent(1,mvaB->GetBinContent(1));
754 Double_t sSel=0;
755 Double_t bSel=0;
756 Double_t separationGain=sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
757 Double_t mvaCut=mvaSC->GetBinLowEdge(1);
758 Double_t sSelCut=sSel;
759 Double_t bSelCut=bSel;
760 // std::cout << "minMVA =" << minMVA << " maxMVA = " << maxMVA << " width = " << mvaSC->GetBinWidth(1) << std::endl;
761
762 // for (Int_t ibin=1;ibin<=nBins;ibin++) std::cout << " cutvalues[" << ibin<<"]="<<mvaSC->GetBinLowEdge(ibin) << " " << mvaSC->GetBinCenter(ibin) << std::endl;
763 Double_t mvaCutOrientation=1; // 1 if mva > mvaCut --> Signal and -1 if mva < mvaCut (i.e. mva*-1 > mvaCut*-1) --> Signal
764 for (Int_t ibin=1;ibin<=nBins;ibin++){
765 mvaSC->SetBinContent(ibin,mvaS->GetBinContent(ibin)+mvaSC->GetBinContent(ibin-1));
766 mvaBC->SetBinContent(ibin,mvaB->GetBinContent(ibin)+mvaBC->GetBinContent(ibin-1));
767
768 sSel=mvaSC->GetBinContent(ibin);
769 bSel=mvaBC->GetBinContent(ibin);
770
771 // if (ibin==nBins){
772 // std::cout << "Last bin s="<< sSel <<" b="<<bSel << " s="<< sTot-sSel <<" b="<<bTot-bSel << endl;
773 // }
774
775 if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
776 // && (mvaSC->GetBinCenter(ibin) >0 || (fCurrentMethodIdx+1)%2 )
777 ){
778 separationGain = sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
779 // mvaCut=mvaSC->GetBinCenter(ibin);
780 mvaCut=mvaSC->GetBinLowEdge(ibin+1);
781 // if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) mvaCutOrientation=-1;
782 if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
783 else mvaCutOrientation=1;
784 sSelCut=sSel;
785 bSelCut=bSel;
786 // std::cout << "new cut at " << mvaCut << "with s="<<sTot-sSel << " b="<<bTot-bSel << std::endl;
787 }
788 /*
789 Double_t ori;
790 if (sSel/bSel > (sTot-sSel)/(bTot-bSel)) ori=-1;
791 else ori=1;
792 std::cout << ibin << " mvacut="<<mvaCut
793 << " sTot=" << sTot
794 << " bTot=" << bTot
795 << " sSel=" << sSel
796 << " bSel=" << bSel
797 << " s/b(1)=" << sSel/bSel
798 << " s/b(2)=" << (sTot-sSel)/(bTot-bSel)
799 << " sepGain="<<sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
800 << " sepGain2="<<sepGain2->GetSeparationGain(sSel,bSel,sTot,bTot)
801 << " " <<ori
802 << std::endl;
803 */
804
805 }
806
807 if (0){
808 double parentIndex=sepGain->GetSeparationIndex(sTot,bTot);
809 double leftIndex =sepGain->GetSeparationIndex(sSelCut,bSelCut);
810 double rightIndex =sepGain->GetSeparationIndex(sTot-sSelCut,bTot-bSelCut);
811 std::cout
812 << " sTot=" << sTot
813 << " bTot=" << bTot
814 << " s="<<sSelCut
815 << " b="<<bSelCut
816 << " s2="<<(sTot-sSelCut)
817 << " b2="<<(bTot-bSelCut)
818 << " s/b(1)=" << sSelCut/bSelCut
819 << " s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
820 << " index before cut=" << parentIndex
821 << " after: left=" << leftIndex
822 << " after: right=" << rightIndex
823 << " sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
824 << " sepGain="<<separationGain
825 << " sepGain="<<sepGain->GetSeparationGain(sSelCut,bSelCut,sTot,bTot)
826 << " cut=" << mvaCut
827 << " idx="<<fCurrentMethodIdx
828 << " cutOrientation="<<mvaCutOrientation
829 << std::endl;
830 }
831 method->SetSignalReferenceCut(mvaCut);
832 method->SetSignalReferenceCutOrientation(mvaCutOrientation);
833
834 results->GetHist("SeparationGain")->SetBinContent(fCurrentMethodIdx+1,separationGain);
835
836
837 Log() << kDEBUG << "(old step) Setting method cut to " <<method->GetSignalReferenceCut()<< Endl;
838
839 if(IsSilentFile())
840 {
841 mvaS ->Delete();
842 mvaB ->Delete();
843 mvaSC->Delete();
844 mvaBC->Delete();
845 }
846}
847
848////////////////////////////////////////////////////////////////////////////////
849
851{
852 Double_t returnVal=-1;
853
854
855 if (fBoostType=="AdaBoost") returnVal = this->AdaBoost (method,1);
856 else if (fBoostType=="RealAdaBoost") returnVal = this->AdaBoost (method,0);
857 else if (fBoostType=="Bagging") returnVal = this->Bagging ();
858 else{
859 Log() << kFATAL << "<Boost> unknown boost option " << fBoostType<< " called" << Endl;
860 }
861 fMethodWeight.push_back(returnVal);
862 return returnVal;
863}
864////////////////////////////////////////////////////////////////////////////////
865/// the standard (discrete or real) AdaBoost algorithm
866
868{
869 if (!method) {
870 Log() << kWARNING << " AdaBoost called without classifier reference - needed for calculating AdaBoost " << Endl;
871 return 0;
872 }
873
874 Float_t w,v; Bool_t sig=kTRUE;
875 Double_t sumAll=0, sumWrong=0;
876 Bool_t* WrongDetection=new Bool_t[GetNEvents()];
877 QuickMVAProbEstimator *MVAProb=NULL;
878
879 if (discreteAdaBoost) {
880 FindMVACut(method);
881 Log() << kDEBUG << " individual mva cut value = " << method->GetSignalReferenceCut() << Endl;
882 } else {
883 MVAProb=new TMVA::QuickMVAProbEstimator();
884 // the RealAdaBoost does use a simple "yes (signal)" or "no (background)"
885 // answer from your single MVA, but a "signal probability" instead (in the BDT case,
886 // that would be the 'purity' in the leaf node. For some MLP parameter, the MVA output
887 // can also interpreted as a probability, but here I try a general approach to get this
888 // probability from the MVA distributions...
889
890 for (Long64_t evt=0; evt<GetNEvents(); evt++) {
891 const Event* ev = Data()->GetEvent(evt);
892 MVAProb->AddEvent(fMVAvalues->at(evt),ev->GetWeight(),ev->GetClass());
893 }
894 }
895
896
897 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=kTRUE;
898
899 // finding the wrong events and calculating their total weights
900 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
901 const Event* ev = GetEvent(ievt);
902 sig=DataInfo().IsSignal(ev);
903 v = fMVAvalues->at(ievt);
904 w = ev->GetWeight();
905 sumAll += w;
906 if(!IsSilentFile())
907 {
908 if (fMonitorBoostedMethod) {
909 if (sig) {
910 fBTrainSigMVAHist[fCurrentMethodIdx]->Fill(v,w);
911 fTrainSigMVAHist[fCurrentMethodIdx]->Fill(v,ev->GetOriginalWeight());
912 }
913 else {
914 fBTrainBgdMVAHist[fCurrentMethodIdx]->Fill(v,w);
915 fTrainBgdMVAHist[fCurrentMethodIdx]->Fill(v,ev->GetOriginalWeight());
916 }
917 }
918 }
919
920 if (discreteAdaBoost){
921 if (sig == method->IsSignalLike(fMVAvalues->at(ievt))){
922 WrongDetection[ievt]=kFALSE;
923 }else{
924 WrongDetection[ievt]=kTRUE;
925 sumWrong+=w;
926 }
927 }else{
928 Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
929 mvaProb = 2*(mvaProb-0.5);
930 Int_t trueType;
931 if (DataInfo().IsSignal(ev)) trueType = 1;
932 else trueType = -1;
933 sumWrong+= w*trueType*mvaProb;
934 }
935 }
936
937 fMethodError=sumWrong/sumAll;
938
939 // calculating the fMethodError and the boostWeight out of it uses the formula
940 // w = ((1-err)/err)^beta
941
942 Double_t boostWeight=0;
943
944 if (fMethodError == 0) { //no misclassification made.. perfect, no boost ;)
945 Log() << kWARNING << "Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " << Endl;
946 }else{
947
948 if (discreteAdaBoost)
949 boostWeight = TMath::Log((1.-fMethodError)/fMethodError)*fAdaBoostBeta;
950 else
951 boostWeight = TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
952
953
954 // std::cout << "boostweight = " << boostWeight << std::endl;
955
956 // ADA boosting, rescaling the weight of the wrong events according to the error level
957 // over the entire test sample rescaling all the weights to have the same sum, but without
958 // touching the original weights (changing only the boosted weight of all the events)
959 // first reweight
960
961 Double_t newSum=0., oldSum=0.;
962
963
964 Double_t boostfactor = TMath::Exp(boostWeight);
965
966
967 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
968 const Event* ev = Data()->GetEvent(ievt);
969 oldSum += ev->GetWeight();
970 if (discreteAdaBoost){
971 // events are classified as Signal OR background .. right or wrong
972 if (WrongDetection[ievt] && boostWeight != 0) {
973 if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
974 else ev->ScaleBoostWeight(1./boostfactor);
975 }
976 // if (ievt<30) std::cout<<ievt<<" var0="<<ev->GetValue(0)<<" var1="<<ev->GetValue(1)<<" weight="<<ev->GetWeight() << " boostby:"<<boostfactor<<std::endl;
977
978 }else{
979 // events are classified by their probability of being signal or background
980 // (eventually you should write this one - i.e. re-use the MVA value that were already
981 // calculated and stored.. however ,for the moment ..
982 Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
983 mvaProb = 2*(mvaProb-0.5);
984 // mvaProb = (1-mvaProb);
985
986 Int_t trueType=1;
987 if (DataInfo().IsSignal(ev)) trueType = 1;
988 else trueType = -1;
989
990 boostfactor = TMath::Exp(-1*boostWeight*trueType*mvaProb);
991 if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
992 else ev->ScaleBoostWeight(1./boostfactor);
993
994 }
995 newSum += ev->GetWeight();
996 }
997
998 Double_t normWeight = oldSum/newSum;
999 // next normalize the weights
1000 Double_t normSig=0, normBkg=0;
1001 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1002 const Event* ev = Data()->GetEvent(ievt);
1003 ev->ScaleBoostWeight(normWeight);
1004 if (ev->GetClass()) normSig+=ev->GetWeight();
1005 else normBkg+=ev->GetWeight();
1006 }
1007
1008 Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
1009 results->GetHist("SoverBtotal")->SetBinContent(fCurrentMethodIdx+1, normSig/normBkg);
1010
1011 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1012 const Event* ev = Data()->GetEvent(ievt);
1013
1014 if (ev->GetClass()) ev->ScaleBoostWeight(oldSum/normSig/2);
1015 else ev->ScaleBoostWeight(oldSum/normBkg/2);
1016 }
1017 }
1018
1019 delete[] WrongDetection;
1020 if (MVAProb) delete MVAProb;
1021
1022 fBoostWeight = boostWeight; // used ONLY for the monitoring tree
1023
1024 return boostWeight;
1025}
1026
1027
1028////////////////////////////////////////////////////////////////////////////////
1029/// Bagging or Bootstrap boosting, gives new random poisson weight for every event
1030
1032{
1033 TRandom3 *trandom = new TRandom3(fRandomSeed+fMethods.size());
1034 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1035 const Event* ev = Data()->GetEvent(ievt);
1036 ev->SetBoostWeight(trandom->PoissonD(fBaggedSampleFraction));
1037 }
1038 fBoostWeight = 1; // used ONLY for the monitoring tree
1039 return 1.;
1040}
1041
1042
1043////////////////////////////////////////////////////////////////////////////////
1044/// Get help message text
1045///
1046/// typical length of text line:
1047/// "|--------------------------------------------------------------|"
1048
1050{
1051 Log() << Endl;
1052 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1053 Log() << Endl;
1054 Log() << "This method combines several classifier of one species in a "<<Endl;
1055 Log() << "single multivariate quantity via the boost algorithm." << Endl;
1056 Log() << "the output is a weighted sum over all individual classifiers" <<Endl;
1057 Log() << "By default, the AdaBoost method is employed, which gives " << Endl;
1058 Log() << "events that were misclassified in the previous tree a larger " << Endl;
1059 Log() << "weight in the training of the following classifier."<<Endl;
1060 Log() << "Optionally, Bagged boosting can also be applied." << Endl;
1061 Log() << Endl;
1062 Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1063 Log() << Endl;
1064 Log() << "The most important parameter in the configuration is the "<<Endl;
1065 Log() << "number of boosts applied (Boost_Num) and the choice of boosting"<<Endl;
1066 Log() << "(Boost_Type), which can be set to either AdaBoost or Bagging." << Endl;
1067 Log() << "AdaBoosting: The most important parameters in this configuration" <<Endl;
1068 Log() << "is the beta parameter (Boost_AdaBoostBeta) " << Endl;
1069 Log() << "When boosting a linear classifier, it is sometimes advantageous"<<Endl;
1070 Log() << "to transform the MVA output non-linearly. The following options" <<Endl;
1071 Log() << "are available: step, log, and minmax, the default is no transform."<<Endl;
1072 Log() <<Endl;
1073 Log() << "Some classifiers are hard to boost and do not improve much in"<<Endl;
1074 Log() << "their performance by boosting them, some even slightly deteriorate"<< Endl;
1075 Log() << "due to the boosting." <<Endl;
1076 Log() << "The booking of the boost method is special since it requires"<<Endl;
1077 Log() << "the booing of the method to be boosted and the boost itself."<<Endl;
1078 Log() << "This is solved by booking the method to be boosted and to add"<<Endl;
1079 Log() << "all Boost parameters, which all begin with \"Boost_\" to the"<<Endl;
1080 Log() << "options string. The factory separates the options and initiates"<<Endl;
1081 Log() << "the boost process. The TMVA macro directory contains the example"<<Endl;
1082 Log() << "macro \"Boost.C\"" <<Endl;
1083}
1084
1085////////////////////////////////////////////////////////////////////////////////
1086
1088{
1089 return 0;
1090}
1091
1092////////////////////////////////////////////////////////////////////////////////
1093/// return boosted MVA response
1094
1096{
1097 Double_t mvaValue = 0;
1098 Double_t norm = 0;
1100 //Double_t fact = TMath::Exp(-1.)+TMath::Exp(1.);
1101 for (UInt_t i=0;i< fMethods.size(); i++){
1102 MethodBase* m = dynamic_cast<MethodBase*>(fMethods[i]);
1103 if (m==0) continue;
1104 Double_t val = fTmpEvent ? m->GetMvaValue(fTmpEvent) : m->GetMvaValue();
1105 Double_t sigcut = m->GetSignalReferenceCut();
1106
1107 // default is no transform
1108 if (fTransformString == "linear"){
1109
1110 }
1111 else if (fTransformString == "log"){
1112 if (val < sigcut) val = sigcut;
1113
1114 val = TMath::Log((val-sigcut)+epsilon);
1115 }
1116 else if (fTransformString == "step" ){
1117 if (m->IsSignalLike(val)) val = 1.;
1118 else val = -1.;
1119 }
1120 else if (fTransformString == "gauss"){
1121 val = TMath::Gaus((val-sigcut),1);
1122 }
1123 else {
1124 Log() << kFATAL << "error unknown transformation " << fTransformString<<Endl;
1125 }
1126 mvaValue+=val*fMethodWeight[i];
1127 norm +=fMethodWeight[i];
1128 // std::cout << "mva("<<i<<") = "<<val<<" " << valx<< " " << mvaValue<<" and sigcut="<<sigcut << std::endl;
1129 }
1130 mvaValue/=norm;
1131 // cannot determine error
1132 NoErrorCalc(err, errUpper);
1133
1134 return mvaValue;
1135}
1136
1137////////////////////////////////////////////////////////////////////////////////
1138/// Calculate the ROC integral of a single classifier or even the
1139/// whole boosted classifier. The tree type (training or testing
1140/// sample) is specified by 'eTT'.
1141///
1142/// If tree type kTraining is set, the original training sample is
1143/// used to compute the ROC integral (original weights).
1144///
1145/// - singleMethod - if kTRUE, return ROC integral of single (last
1146/// trained) classifier; if kFALSE, return ROC
1147/// integral of full classifier
1148///
1149/// - eTT - tree type (Types::kTraining / Types::kTesting)
1150///
1151/// - CalcOverlapIntergral - if kTRUE, the overlap integral of the
1152/// signal/background MVA distributions
1153/// is calculated and stored in
1154/// 'fOverlap_integral'
1155
1157{
1158 // set data sample training / testing
1159 Data()->SetCurrentType(eTT);
1160
1161 MethodBase* method = singleMethod ? dynamic_cast<MethodBase*>(fMethods.back()) : 0; // ToDo CoVerity flags this line as there is no protection against a zero-pointer delivered by dynamic_cast
1162 // to make CoVerity happy (although, OF COURSE, the last method in the committee
1163 // has to be also of type MethodBase as ANY method is... hence the dynamic_cast
1164 // will never by "zero" ...
1165 if (singleMethod && !method) {
1166 Log() << kFATAL << " What do you do? Your method:"
1167 << fMethods.back()->GetName()
1168 << " seems not to be a propper TMVA method"
1169 << Endl;
1170 std::exit(1);
1171 }
1172 Double_t err = 0.0;
1173
1174 // temporary renormalize the method weights in case of evaluation
1175 // of full classifier.
1176 // save the old normalization of the methods
1177 std::vector<Double_t> OldMethodWeight(fMethodWeight);
1178 if (!singleMethod) {
1179 // calculate sum of weights of all methods
1180 Double_t AllMethodsWeight = 0;
1181 for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1182 AllMethodsWeight += fMethodWeight.at(i);
1183 // normalize the weights of the classifiers
1184 if (AllMethodsWeight != 0.0) {
1185 for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1186 fMethodWeight[i] /= AllMethodsWeight;
1187 }
1188 }
1189
1190 // calculate MVA values
1191 Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
1192 std::vector <Float_t>* mvaRes;
1193 if (singleMethod && eTT==Types::kTraining)
1194 mvaRes = fMVAvalues; // values already calculated
1195 else {
1196 mvaRes = new std::vector <Float_t>(GetNEvents());
1197 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1198 GetEvent(ievt);
1199 (*mvaRes)[ievt] = singleMethod ? method->GetMvaValue(&err) : GetMvaValue(&err);
1200 }
1201 }
1202
1203 // restore the method weights
1204 if (!singleMethod)
1205 fMethodWeight = OldMethodWeight;
1206
1207 // now create histograms for calculation of the ROC integral
1208 Int_t signalClass = 0;
1209 if (DataInfo().GetClassInfo("Signal") != 0) {
1210 signalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1211 }
1212 gTools().ComputeStat( GetEventCollection(eTT), mvaRes,
1213 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
1214
1216 xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
1217 xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.0001;
1218
1219 // calculate ROC integral
1220 TH1* mva_s = new TH1F( "MVA_S", "MVA_S", fNbins, xmin, xmax );
1221 TH1* mva_b = new TH1F( "MVA_B", "MVA_B", fNbins, xmin, xmax );
1222 TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1223 if (CalcOverlapIntergral) {
1224 mva_s_overlap = new TH1F( "MVA_S_OVERLAP", "MVA_S_OVERLAP", fNbins, xmin, xmax );
1225 mva_b_overlap = new TH1F( "MVA_B_OVERLAP", "MVA_B_OVERLAP", fNbins, xmin, xmax );
1226 }
1227 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1228 const Event* ev = GetEvent(ievt);
1229 Float_t w = (eTT==Types::kTesting ? ev->GetWeight() : ev->GetOriginalWeight());
1230 if (DataInfo().IsSignal(ev)) mva_s->Fill( (*mvaRes)[ievt], w );
1231 else mva_b->Fill( (*mvaRes)[ievt], w );
1232
1233 if (CalcOverlapIntergral) {
1234 Float_t w_ov = ev->GetWeight();
1235 if (DataInfo().IsSignal(ev))
1236 mva_s_overlap->Fill( (*mvaRes)[ievt], w_ov );
1237 else
1238 mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1239 }
1240 }
1241 gTools().NormHist( mva_s );
1242 gTools().NormHist( mva_b );
1243 PDF *fS = new PDF( "PDF Sig", mva_s, PDF::kSpline2 );
1244 PDF *fB = new PDF( "PDF Bkg", mva_b, PDF::kSpline2 );
1245
1246 // calculate ROC integral from fS, fB
1248
1249 // calculate overlap integral
1250 if (CalcOverlapIntergral) {
1251 gTools().NormHist( mva_s_overlap );
1252 gTools().NormHist( mva_b_overlap );
1253
1254 fOverlap_integral = 0.0;
1255 for (Int_t bin=1; bin<=mva_s_overlap->GetNbinsX(); bin++){
1256 Double_t bc_s = mva_s_overlap->GetBinContent(bin);
1257 Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1258 if (bc_s > 0.0 && bc_b > 0.0)
1259 fOverlap_integral += TMath::Min(bc_s, bc_b);
1260 }
1261
1262 delete mva_s_overlap;
1263 delete mva_b_overlap;
1264 }
1265
1266 delete mva_s;
1267 delete mva_b;
1268 delete fS;
1269 delete fB;
1270 if (!(singleMethod && eTT==Types::kTraining)) delete mvaRes;
1271
1272 Data()->SetCurrentType(Types::kTraining);
1273
1274 return ROC;
1275}
1276
1278{
1279 // Calculate MVA values of current method fMethods.back() on
1280 // training sample
1281
1282 Data()->SetCurrentType(Types::kTraining);
1283 MethodBase* method = dynamic_cast<MethodBase*>(fMethods.back());
1284 if (!method) {
1285 Log() << kFATAL << "dynamic cast to MethodBase* failed" <<Endl;
1286 return;
1287 }
1288 // calculate MVA values
1289 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1290 GetEvent(ievt);
1291 fMVAvalues->at(ievt) = method->GetMvaValue();
1292 }
1293
1294 // fill cumulative mva distribution
1295
1296
1297}
1298
1299////////////////////////////////////////////////////////////////////////////////
1300/// fill various monitoring histograms from information of the individual classifiers that
1301/// have been boosted.
1302/// of course.... this depends very much on the individual classifiers, and so far, only for
1303/// Decision Trees, this monitoring is actually implemented
1304
1306{
1307 Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
1308
1309 if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kDT) {
1310 TMVA::MethodDT* currentDT=dynamic_cast<TMVA::MethodDT*>(GetCurrentMethod(methodIndex));
1311 if (currentDT){
1312 if (stage == Types::kBoostProcBegin){
1313 results->Store(new TH1I("NodesBeforePruning","nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesBeforePruning");
1314 results->Store(new TH1I("NodesAfterPruning","nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),"NodesAfterPruning");
1315 }
1316
1317 if (stage == Types::kBeforeTraining){
1318 }
1319 else if (stage == Types::kBeforeBoosting){
1320 results->GetHist("NodesBeforePruning")->SetBinContent(methodIndex+1,currentDT->GetNNodesBeforePruning());
1321 results->GetHist("NodesAfterPruning")->SetBinContent(methodIndex+1,currentDT->GetNNodes());
1322 }
1323 else if (stage == Types::kAfterBoosting){
1324
1325 }
1326 else if (stage != Types::kBoostProcEnd){
1327 Log() << kINFO << "<Train> average number of nodes before/after pruning : "
1328 << results->GetHist("NodesBeforePruning")->GetMean() << " / "
1329 << results->GetHist("NodesAfterPruning")->GetMean()
1330 << Endl;
1331 }
1332 }
1333
1334 }else if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kFisher) {
1335 if (stage == Types::kAfterBoosting){
1337 }
1338 }else{
1339 if (methodIndex < 3){
1340 Log() << kDEBUG << "No detailed boost monitoring for "
1341 << GetCurrentMethod(methodIndex)->GetMethodName()
1342 << " yet available " << Endl;
1343 }
1344 }
1345
1346 //boosting plots universal for all classifiers 'typically for debug purposes only as they are not general enough'
1347
1348 if (stage == Types::kBeforeBoosting){
1349 // if you want to display the weighted events for 2D case at each boost step:
1350 if (fDetailedMonitoring){
1351 // the following code is useful only for 2D examples - mainly illustration for debug/educational purposes:
1352 if (DataInfo().GetNVariables() == 2) {
1353 results->Store(new TH2F(Form("EventDistSig_%d",methodIndex),Form("EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1354 results->GetHist(Form("EventDistSig_%d",methodIndex))->SetMarkerColor(4);
1355 results->Store(new TH2F(Form("EventDistBkg_%d",methodIndex),Form("EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1356 results->GetHist(Form("EventDistBkg_%d",methodIndex))->SetMarkerColor(2);
1357
1358 Data()->SetCurrentType(Types::kTraining);
1359 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1360 const Event* ev = GetEvent(ievt);
1361 Float_t w = ev->GetWeight();
1362 Float_t v0= ev->GetValue(0);
1363 Float_t v1= ev->GetValue(1);
1364 // if (ievt<3) std::cout<<ievt<<" var0="<<v0<<" var1="<<v1<<" weight="<<w<<std::endl;
1365 TH2* h;
1366 if (DataInfo().IsSignal(ev)) h=results->GetHist2D(Form("EventDistSig_%d",methodIndex));
1367 else h=results->GetHist2D(Form("EventDistBkg_%d",methodIndex));
1368 if (h) h->Fill(v0,v1,w);
1369 }
1370 }
1371 }
1372 }
1373
1374 return;
1375}
1376
1377
#define REGISTER_METHOD(CLASS)
for example
#define h(i)
Definition: RSha256.hxx:106
const Bool_t kFALSE
Definition: RtypesCore.h:101
float Float_t
Definition: RtypesCore.h:57
long long Long64_t
Definition: RtypesCore.h:80
const Bool_t kTRUE
Definition: RtypesCore.h:100
#define ClassImp(name)
Definition: Rtypes.h:375
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
float xmin
Definition: THbookFile.cxx:95
float xmax
Definition: THbookFile.cxx:95
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition: TString.cxx:2452
Stat_t GetSum() const
Definition: TArrayD.h:46
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
Definition: TAttMarker.h:38
Describe directory structure in memory.
Definition: TDirectory.h:45
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
Definition: TDirectory.cxx:407
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
virtual Bool_t cd(const char *path=nullptr)
Change current directory to "this" directory.
Definition: TDirectory.cxx:504
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:617
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:574
1-D histogram with an int per channel (see TH1 documentation)}
Definition: TH1.h:533
TH1 is the base class of all histogram classes in ROOT.
Definition: TH1.h:58
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:7446
TAxis * GetXaxis()
Definition: TH1.h:319
virtual Int_t GetNbinsX() const
Definition: TH1.h:295
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3338
TAxis * GetYaxis()
Definition: TH1.h:320
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:9089
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:9019
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:5025
2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:257
Service class for 2-D histogram classes.
Definition: TH2.h:30
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
TString fWeightFileDir
Definition: Config.h:124
class TMVA::Config::VariablePlotting fVariablePlotting
IONames & GetIONames()
Definition: Config.h:98
Class that contains all the data information.
Definition: DataSetInfo.h:62
void ScaleBoostWeight(Double_t s) const
Definition: Event.h:112
void SetBoostWeight(Double_t w) const
Definition: Event.h:111
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:236
Double_t GetOriginalWeight() const
Definition: Event.h:84
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition: Event.cxx:389
UInt_t GetClass() const
Definition: Event.h:86
Implementation of the GiniIndex as separation criterion.
Definition: GiniIndex.h:63
Interface for all concrete MVA method implementations.
Definition: IMethod.h:53
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:378
void SetWeightFileDir(TString fileDir)
set directory of weight file
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:596
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:854
virtual void TestClassification()
initialization
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
Types::EMVA GetMethodType() const
Definition: MethodBase.h:333
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:364
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
Definition: MethodBase.h:365
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:382
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:360
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
friend class MethodBoost
Definition: MethodBase.h:116
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
void SingleTrain()
initialization
DataSetManager * fDataSetManager
DSMTEST.
Definition: MethodBoost.h:193
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
void CreateMVAHistorgrams()
Bool_t fHistoricBoolOption
historic variable, only needed for "CompatibilityOptions"
Definition: MethodBoost.h:195
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------—
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
virtual void TestClassification()
initialization
void InitHistos()
initialisation routine
void ProcessOptions()
process user options
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
Double_t SingleBoost(MethodBase *method)
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
const Ranking * CreateRanking()
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
std::vector< Float_t > * fMVAvalues
mva values for the last trained method
Definition: MethodBoost.h:191
virtual ~MethodBoost(void)
destructor
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
void GetHelpMessage() const
Get help message text.
Class for categorizing the phase space.
DataSetManager * fDataSetManager
Virtual base class for combining several TMVA method.
std::vector< IMethod * > fMethods
vector of all classifiers
Analysis of Boosted Decision Trees.
Definition: MethodDT.h:49
Int_t GetNNodes()
Definition: MethodDT.h:97
Int_t GetNNodesBeforePruning()
Definition: MethodDT.h:96
static void InhibitOutput()
Definition: MsgLogger.cxx:67
static void EnableOutput()
Definition: MsgLogger.cxx:68
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
@ kSpline2
Definition: PDF.h:70
Double_t GetMVAProbAt(Double_t value)
void AddEvent(Double_t val, Double_t weight, Int_t type)
Ranking for variables in method (implementation)
Definition: Ranking.h:48
Class that is the base-class for a vector of result.
Definition: Results.h:57
TH2 * GetHist2D(const TString &alias) const
Definition: Results.cxx:145
TH1 * GetHist(const TString &alias) const
Definition: Results.cxx:136
void Store(TObject *obj, const char *alias=0)
Definition: Results.cxx:86
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:146
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:202
void ComputeStat(const std::vector< TMVA::Event * > &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:202
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:828
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:383
Singleton class for Global types used by TMVA.
Definition: Types.h:71
@ kBoostProcBegin
Definition: Types.h:151
@ kBeforeBoosting
Definition: Types.h:153
@ kAfterBoosting
Definition: Types.h:154
@ kBoostProcEnd
Definition: Types.h:155
@ kBeforeTraining
Definition: Types.h:152
TString GetMethodName(Types::EMVA method) const
Definition: Types.cxx:136
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:70
@ kFisher
Definition: Types.h:82
@ kCategory
Definition: Types.h:97
EAnalysisType
Definition: Types.h:126
@ kClassification
Definition: Types.h:127
@ kMaxTreeType
also used as temporary storage for trees not yet assigned for testing;training...
Definition: Types.h:145
@ kTraining
Definition: Types.h:143
@ kTesting
Definition: Types.h:144
@ kDEBUG
Definition: Types.h:56
@ kINFO
Definition: Types.h:58
@ kWARNING
Definition: Types.h:59
@ kFATAL
Definition: Types.h:61
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
virtual void Delete(Option_t *option="")
Delete this object.
Definition: TObject.cxx:171
Random number generator class based on M.
Definition: TRandom3.h:27
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
Definition: TRandom.cxx:454
Basic string class.
Definition: TString.h:136
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1155
A TTree represents a columnar dataset.
Definition: TTree.h:79
RooCmdArg Timer(bool flag=true)
void GetMethodName(TString &name, TKey *mkey)
Definition: tmvaglob.cxx:342
create variable transformations
Config & gConfig()
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:148
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculates a gaussian function with mean and sigma.
Definition: TMath.cxx:471
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition: TMathBase.h:250
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Definition: TMath.h:707
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Definition: TMath.h:754
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Definition: TMathBase.h:198
TMarker m
Definition: textangle.C:8
double epsilon
Definition: triangle.c:618