99 , fDetailedMonitoring(
kFALSE)
102 , fBaggedSampleFraction(0)
103 , fBoostedMethodTitle(methodTitle)
104 , fBoostedMethodOptions(theOption)
105 , fMonitorBoostedMethod(
kFALSE)
110 , fOverlap_integral(0.0)
124 , fDetailedMonitoring(
kFALSE)
127 , fBaggedSampleFraction(0)
128 , fBoostedMethodTitle(
"")
129 , fBoostedMethodOptions(
"")
130 , fMonitorBoostedMethod(
kFALSE)
135 , fOverlap_integral(0.0)
148 fMethodWeight.clear();
152 fTrainSigMVAHist.clear();
153 fTrainBgdMVAHist.clear();
154 fBTrainSigMVAHist.clear();
155 fBTrainBgdMVAHist.clear();
156 fTestSigMVAHist.clear();
157 fTestBgdMVAHist.clear();
181 DeclareOptionRef( fBoostNum = 1,
"Boost_Num",
182 "Number of times the classifier is boosted" );
184 DeclareOptionRef( fMonitorBoostedMethod =
kTRUE,
"Boost_MonitorMethod",
185 "Write monitoring histograms for each boosted classifier" );
187 DeclareOptionRef( fDetailedMonitoring =
kFALSE,
"Boost_DetailedMonitoring",
188 "Produce histograms for detailed boost monitoring" );
190 DeclareOptionRef( fBoostType =
"AdaBoost",
"Boost_Type",
"Boosting type for the classifiers" );
191 AddPreDefVal(
TString(
"RealAdaBoost"));
192 AddPreDefVal(
TString(
"AdaBoost"));
193 AddPreDefVal(
TString(
"Bagging"));
195 DeclareOptionRef(fBaggedSampleFraction=.6,
"Boost_BaggedSampleFraction",
"Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
197 DeclareOptionRef( fAdaBoostBeta = 1.0,
"Boost_AdaBoostBeta",
198 "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
200 DeclareOptionRef( fTransformString =
"step",
"Boost_Transform",
201 "Type of transform applied to every boosted method linear, log, step" );
203 AddPreDefVal(
TString(
"linear"));
205 AddPreDefVal(
TString(
"gauss"));
207 DeclareOptionRef( fRandomSeed = 0,
"Boost_RandomSeed",
208 "Seed for random number generator used for bagging" );
223 DeclareOptionRef( fHistoricOption =
"ByError",
"Boost_MethodWeightType",
224 "How to set the final weight of the boosted classifiers" );
225 AddPreDefVal(
TString(
"ByError"));
226 AddPreDefVal(
TString(
"Average"));
227 AddPreDefVal(
TString(
"ByROC"));
228 AddPreDefVal(
TString(
"ByOverlap"));
229 AddPreDefVal(
TString(
"LastMethod"));
231 DeclareOptionRef( fHistoricOption =
"step",
"Boost_Transform",
232 "Type of transform applied to every boosted method linear, log, step" );
234 AddPreDefVal(
TString(
"linear"));
236 AddPreDefVal(
TString(
"gauss"));
241 AddPreDefVal(
TString(
"HighEdgeGauss"));
242 AddPreDefVal(
TString(
"HighEdgeCoPara"));
245 DeclareOptionRef( fHistoricBoolOption,
"Boost_RecalculateMVACut",
246 "Recalculate the classifier MVA Signallike cut at every boost iteration" );
256 fBoostedMethodTitle = methodTitle;
257 fBoostedMethodOptions = theOption;
279 results->
Store(
new TH1F(
"MethodWeight",
"Normalized Classifier Weight",fBoostNum,0,fBoostNum),
"ClassifierWeight");
280 results->
Store(
new TH1F(
"BoostWeight",
"Boost Weight",fBoostNum,0,fBoostNum),
"BoostWeight");
281 results->
Store(
new TH1F(
"ErrFraction",
"Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),
"ErrorFraction");
282 if (fDetailedMonitoring){
283 results->
Store(
new TH1F(
"ROCIntegral_test",
"ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),
"ROCIntegral_test");
284 results->
Store(
new TH1F(
"ROCIntegralBoosted_test",
"ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),
"ROCIntegralBoosted_test");
285 results->
Store(
new TH1F(
"ROCIntegral_train",
"ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),
"ROCIntegral_train");
286 results->
Store(
new TH1F(
"ROCIntegralBoosted_train",
"ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),
"ROCIntegralBoosted_train");
287 results->
Store(
new TH1F(
"OverlapIntegal_train",
"Overlap integral (training sample)",fBoostNum,0,fBoostNum),
"Overlap");
297 if (fDetailedMonitoring){
310 results->
Store(
new TH1F(
"SoverBtotal",
"S/B in reweighted training sample",fBoostNum,0,fBoostNum),
"SoverBtotal");
314 results->
Store(
new TH1F(
"SeparationGain",
"SeparationGain",fBoostNum,0,fBoostNum),
"SeparationGain");
320 fMonitorTree=
new TTree(
"MonitorBoost",
"Boost variables");
321 fMonitorTree->Branch(
"iMethod",&fCurrentMethodIdx,
"iMethod/I");
322 fMonitorTree->Branch(
"boostWeight",&fBoostWeight,
"boostWeight/D");
323 fMonitorTree->Branch(
"errorFraction",&fMethodError,
"errorFraction/D");
324 fMonitorBoostedMethod =
kTRUE;
333 Log() << kDEBUG <<
"CheckSetup: fBoostType="<<fBoostType <<
Endl;
334 Log() << kDEBUG <<
"CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<
Endl;
335 Log() << kDEBUG <<
"CheckSetup: fBoostWeight="<<fBoostWeight<<
Endl;
336 Log() << kDEBUG <<
"CheckSetup: fMethodError="<<fMethodError<<
Endl;
337 Log() << kDEBUG <<
"CheckSetup: fBoostNum="<<fBoostNum <<
Endl;
338 Log() << kDEBUG <<
"CheckSetup: fRandomSeed=" << fRandomSeed<<
Endl;
339 Log() << kDEBUG <<
"CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<
Endl;
340 Log() << kDEBUG <<
"CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<
Endl;
341 Log() << kDEBUG <<
"CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod?
"true" :
"false") <<
Endl;
342 Log() << kDEBUG <<
"CheckSetup: MName=" << fBoostedMethodName <<
" Title="<< fBoostedMethodTitle<<
Endl;
343 Log() << kDEBUG <<
"CheckSetup: MOptions="<< fBoostedMethodOptions <<
Endl;
344 Log() << kDEBUG <<
"CheckSetup: fMonitorTree=" << fMonitorTree <<
Endl;
345 Log() << kDEBUG <<
"CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx <<
Endl;
346 if (fMethods.size()>0)
Log() << kDEBUG <<
"CheckSetup: fMethods[0]" <<fMethods[0]<<
Endl;
347 Log() << kDEBUG <<
"CheckSetup: fMethodWeight.size()" << fMethodWeight.size() <<
Endl;
348 if (fMethodWeight.size()>0)
Log() << kDEBUG <<
"CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<
Endl;
349 Log() << kDEBUG <<
"CheckSetup: trying to repair things" <<
Endl;
364 if (Data()->GetNTrainingEvents()==0)
Log() << kFATAL <<
"<Train> Data() has zero events" <<
Endl;
367 if (fMethods.size() > 0) fMethods.clear();
368 fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0);
370 Log() << kINFO <<
"Training "<< fBoostNum <<
" " << fBoostedMethodName <<
" with title " << fBoostedMethodTitle <<
" Classifiers ... patience please" <<
Endl;
371 Timer timer( fBoostNum, GetName() );
381 Ssiz_t varTrafoStart=fBoostedMethodOptions.Index(
"~VarTransform=");
382 if (varTrafoStart >0) {
383 Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(
":",varTrafoStart);
384 if (varTrafoEnd<varTrafoStart)
385 varTrafoEnd=fBoostedMethodOptions.Length();
386 fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
391 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
396 fBoostedMethodName.Data(), GetJobName(),
Form(
"%s_B%04i", fBoostedMethodTitle.Data(), fCurrentMethodIdx),
397 DataInfo(), fBoostedMethodOptions);
401 fCurrentMethod = (
dynamic_cast<MethodBase*
>(method));
403 if (fCurrentMethod==0) {
404 Log() << kFATAL <<
"uups.. guess the booking of the " << fCurrentMethodIdx <<
"-th classifier somehow failed" <<
Endl;
412 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" <<
Endl;
416 fCurrentMethod->SetMsgType(kWARNING);
417 fCurrentMethod->SetupMethod();
418 fCurrentMethod->ParseOptions();
420 fCurrentMethod->SetAnalysisType( GetAnalysisType() );
421 fCurrentMethod->ProcessSetup();
422 fCurrentMethod->CheckSetup();
426 fCurrentMethod->RerouteTransformationHandler (&(this->GetTransformationHandler()));
432 if (fMonitorBoostedMethod) {
433 methodDir=GetFile()->
GetDirectory(dirName=
Form(
"%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
435 methodDir=BaseDir()->
mkdir(dirName,dirTitle=
Form(
"Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
437 fCurrentMethod->SetMethodDir(methodDir);
438 fCurrentMethod->BaseDir()->
cd();
448 if (fBoostType==
"Bagging") Bagging();
451 if(!IsSilentFile())fCurrentMethod->WriteMonitoringHistosToFile();
457 if(!IsSilentFile())
if (fCurrentMethodIdx==0 && fMonitorBoostedMethod) CreateMVAHistorgrams();
465 SingleBoost(fCurrentMethod);
471 if (fDetailedMonitoring) {
482 fMonitorTree->Fill();
486 Log() << kDEBUG <<
"AdaBoost (methodErr) err = " << fMethodError <<
Endl;
487 if (fMethodError > 0.49999) StopCounter++;
488 if (StopCounter > 0 && fBoostType !=
"Bagging") {
490 fBoostNum = fCurrentMethodIdx+1;
491 Log() << kINFO <<
"Error rate has reached 0.5 ("<< fMethodError<<
"), boosting process stopped at #" << fBoostNum <<
" classifier" <<
Endl;
493 Log() << kINFO <<
"The classifier might be too strong to boost with Beta = " << fAdaBoostBeta <<
", try reducing it." <<
Endl;
503 Timer* timer1=
new Timer( fBoostNum, GetName() );
505 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
510 if (fCurrentMethodIdx==fBoostNum) {
515 TH1F* tmp =
dynamic_cast<TH1F*
>( results->
GetHist(
"ClassifierWeight") );
516 if (tmp) tmp->
SetBinContent(fCurrentMethodIdx+1,fMethodWeight[fCurrentMethodIdx]);
525 if (fMethods.size()==1) fMethodWeight[0] = 1.0;
536 fBoostedMethodOptions=GetOptions();
543 if (fBoostNum <=0)
Log() << kFATAL <<
"CreateHistograms called before fBoostNum is initialized" <<
Endl;
547 Int_t signalClass = 0;
548 if (DataInfo().GetClassInfo(
"Signal") != 0) {
549 signalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
552 meanS, meanB, rmsS, rmsB,
xmin,
xmax, signalClass );
559 for (
UInt_t imtd=0; imtd<fBoostNum; imtd++) {
560 fTrainSigMVAHist .push_back(
new TH1F(
Form(
"MVA_Train_S_%04i",imtd),
"MVA_Train_S", fNbins,
xmin,
xmax ) );
561 fTrainBgdMVAHist .push_back(
new TH1F(
Form(
"MVA_Train_B%04i", imtd),
"MVA_Train_B", fNbins,
xmin,
xmax ) );
562 fBTrainSigMVAHist.push_back(
new TH1F(
Form(
"MVA_BTrain_S%04i",imtd),
"MVA_BoostedTrain_S", fNbins,
xmin,
xmax ) );
563 fBTrainBgdMVAHist.push_back(
new TH1F(
Form(
"MVA_BTrain_B%04i",imtd),
"MVA_BoostedTrain_B", fNbins,
xmin,
xmax ) );
564 fTestSigMVAHist .push_back(
new TH1F(
Form(
"MVA_Test_S%04i", imtd),
"MVA_Test_S", fNbins,
xmin,
xmax ) );
565 fTestBgdMVAHist .push_back(
new TH1F(
Form(
"MVA_Test_B%04i", imtd),
"MVA_Test_B", fNbins,
xmin,
xmax ) );
574 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
575 const Event *ev = Data()->GetEvent(ievt);
585 if (fMonitorBoostedMethod) {
586 for (
UInt_t imtd=0;imtd<fBoostNum;imtd++) {
593 fTrainSigMVAHist[imtd]->SetDirectory(dir);
594 fTrainSigMVAHist[imtd]->Write();
595 fTrainBgdMVAHist[imtd]->SetDirectory(dir);
596 fTrainBgdMVAHist[imtd]->Write();
597 fBTrainSigMVAHist[imtd]->SetDirectory(dir);
598 fBTrainSigMVAHist[imtd]->Write();
599 fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
600 fBTrainBgdMVAHist[imtd]->Write();
607 fMonitorTree->Write();
615 if (fMonitorBoostedMethod) {
616 UInt_t nloop = fTestSigMVAHist.size();
617 if (fMethods.size()<nloop) nloop = fMethods.size();
620 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
621 const Event* ev = GetEvent(ievt);
623 if (DataInfo().IsSignal(ev)) {
624 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
625 fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
629 for (
UInt_t imtd=0; imtd<nloop; imtd++) {
630 fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
644 UInt_t nloop = fTestSigMVAHist.size();
645 if (fMethods.size()<nloop) nloop = fMethods.size();
646 if (fMonitorBoostedMethod) {
648 for (
UInt_t imtd=0;imtd<nloop;imtd++) {
653 if (dir==0)
continue;
655 fTestSigMVAHist[imtd]->SetDirectory(dir);
656 fTestSigMVAHist[imtd]->Write();
657 fTestBgdMVAHist[imtd]->SetDirectory(dir);
658 fTestBgdMVAHist[imtd]->Write();
679 if(IsModelPersistence()){
680 TString _fFileDir= DataInfo().GetName();
698 const Int_t nBins=10001;
701 for (
Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
705 if (val>maxMVA) maxMVA=val;
706 if (val<minMVA) minMVA=val;
708 maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
712 TH1D *mvaS =
new TH1D(
Form(
"MVAS_%d",fCurrentMethodIdx) ,
"",nBins,minMVA,maxMVA);
713 TH1D *mvaB =
new TH1D(
Form(
"MVAB_%d",fCurrentMethodIdx) ,
"",nBins,minMVA,maxMVA);
714 TH1D *mvaSC =
new TH1D(
Form(
"MVASC_%d",fCurrentMethodIdx),
"",nBins,minMVA,maxMVA);
715 TH1D *mvaBC =
new TH1D(
Form(
"MVABC_%d",fCurrentMethodIdx),
"",nBins,minMVA,maxMVA);
719 if (fDetailedMonitoring){
720 results->
Store(mvaS,
Form(
"MVAS_%d",fCurrentMethodIdx));
721 results->
Store(mvaB,
Form(
"MVAB_%d",fCurrentMethodIdx));
722 results->
Store(mvaSC,
Form(
"MVASC_%d",fCurrentMethodIdx));
723 results->
Store(mvaBC,
Form(
"MVABC_%d",fCurrentMethodIdx));
726 for (
Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
728 Double_t weight = GetEvent(ievt)->GetWeight();
731 if (DataInfo().IsSignal(GetEvent(ievt))){
732 mvaS->
Fill(mvaVal,weight);
734 mvaB->
Fill(mvaVal,weight);
770 for (
Int_t ibin=1;ibin<=nBins;ibin++){
781 if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
788 if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
789 else mvaCutOrientation=1;
822 <<
" s2="<<(sTot-sSelCut)
823 <<
" b2="<<(bTot-bSelCut)
824 <<
" s/b(1)=" << sSelCut/bSelCut
825 <<
" s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
826 <<
" index before cut=" << parentIndex
827 <<
" after: left=" << leftIndex
828 <<
" after: right=" << rightIndex
829 <<
" sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
830 <<
" sepGain="<<separationGain
833 <<
" idx="<<fCurrentMethodIdx
834 <<
" cutOrientation="<<mvaCutOrientation
861 if (fBoostType==
"AdaBoost") returnVal = this->AdaBoost (method,1);
862 else if (fBoostType==
"RealAdaBoost") returnVal = this->AdaBoost (method,0);
863 else if (fBoostType==
"Bagging") returnVal = this->Bagging ();
865 Log() << kFATAL <<
"<Boost> unknown boost option " << fBoostType<<
" called" <<
Endl;
867 fMethodWeight.push_back(returnVal);
876 Log() << kWARNING <<
" AdaBoost called without classifier reference - needed for calculating AdaBoost " <<
Endl;
885 if (discreteAdaBoost) {
896 for (
Long64_t evt=0; evt<GetNEvents(); evt++) {
897 const Event* ev = Data()->GetEvent(evt);
903 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=
kTRUE;
906 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
907 const Event* ev = GetEvent(ievt);
908 sig=DataInfo().IsSignal(ev);
909 v = fMVAvalues->at(ievt);
914 if (fMonitorBoostedMethod) {
916 fBTrainSigMVAHist[fCurrentMethodIdx]->Fill(
v,w);
920 fBTrainBgdMVAHist[fCurrentMethodIdx]->Fill(
v,w);
926 if (discreteAdaBoost){
928 WrongDetection[ievt]=
kFALSE;
930 WrongDetection[ievt]=
kTRUE;
935 mvaProb = 2*(mvaProb-0.5);
937 if (DataInfo().IsSignal(ev)) trueType = 1;
939 sumWrong+= w*trueType*mvaProb;
943 fMethodError=sumWrong/sumAll;
950 if (fMethodError == 0) {
951 Log() << kWARNING <<
"Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " <<
Endl;
954 if (discreteAdaBoost)
955 boostWeight =
TMath::Log((1.-fMethodError)/fMethodError)*fAdaBoostBeta;
957 boostWeight =
TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
973 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
974 const Event* ev = Data()->GetEvent(ievt);
976 if (discreteAdaBoost){
978 if (WrongDetection[ievt] && boostWeight != 0) {
989 mvaProb = 2*(mvaProb-0.5);
993 if (DataInfo().IsSignal(ev)) trueType = 1;
996 boostfactor =
TMath::Exp(-1*boostWeight*trueType*mvaProb);
1004 Double_t normWeight = oldSum/newSum;
1007 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1008 const Event* ev = Data()->GetEvent(ievt);
1017 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1018 const Event* ev = Data()->GetEvent(ievt);
1025 delete[] WrongDetection;
1026 if (MVAProb)
delete MVAProb;
1028 fBoostWeight = boostWeight;
1040 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1041 const Event* ev = Data()->GetEvent(ievt);
1060 Log() <<
"This method combines several classifier of one species in a "<<
Endl;
1061 Log() <<
"single multivariate quantity via the boost algorithm." <<
Endl;
1062 Log() <<
"the output is a weighted sum over all individual classifiers" <<
Endl;
1063 Log() <<
"By default, the AdaBoost method is employed, which gives " <<
Endl;
1064 Log() <<
"events that were misclassified in the previous tree a larger " <<
Endl;
1065 Log() <<
"weight in the training of the following classifier."<<
Endl;
1066 Log() <<
"Optionally, Bagged boosting can also be applied." <<
Endl;
1070 Log() <<
"The most important parameter in the configuration is the "<<
Endl;
1071 Log() <<
"number of boosts applied (Boost_Num) and the choice of boosting"<<
Endl;
1072 Log() <<
"(Boost_Type), which can be set to either AdaBoost or Bagging." <<
Endl;
1073 Log() <<
"AdaBoosting: The most important parameters in this configuration" <<
Endl;
1074 Log() <<
"is the beta parameter (Boost_AdaBoostBeta) " <<
Endl;
1075 Log() <<
"When boosting a linear classifier, it is sometimes advantageous"<<
Endl;
1076 Log() <<
"to transform the MVA output non-linearly. The following options" <<
Endl;
1077 Log() <<
"are available: step, log, and minmax, the default is no transform."<<
Endl;
1079 Log() <<
"Some classifiers are hard to boost and do not improve much in"<<
Endl;
1080 Log() <<
"their performance by boosting them, some even slightly deteriorate"<<
Endl;
1081 Log() <<
"due to the boosting." <<
Endl;
1082 Log() <<
"The booking of the boost method is special since it requires"<<
Endl;
1083 Log() <<
"the booing of the method to be boosted and the boost itself."<<
Endl;
1084 Log() <<
"This is solved by booking the method to be boosted and to add"<<
Endl;
1085 Log() <<
"all Boost parameters, which all begin with \"Boost_\" to the"<<
Endl;
1086 Log() <<
"options string. The factory separates the options and initiates"<<
Endl;
1087 Log() <<
"the boost process. The TMVA macro directory contains the example"<<
Endl;
1088 Log() <<
"macro \"Boost.C\"" <<
Endl;
1107 for (
UInt_t i=0;i< fMethods.size(); i++){
1110 Double_t val = fTmpEvent ?
m->GetMvaValue(fTmpEvent) :
m->GetMvaValue();
1111 Double_t sigcut =
m->GetSignalReferenceCut();
1114 if (fTransformString ==
"linear"){
1117 else if (fTransformString ==
"log"){
1118 if (val < sigcut) val = sigcut;
1122 else if (fTransformString ==
"step" ){
1123 if (
m->IsSignalLike(val)) val = 1.;
1126 else if (fTransformString ==
"gauss"){
1130 Log() << kFATAL <<
"error unknown transformation " << fTransformString<<
Endl;
1132 mvaValue+=val*fMethodWeight[i];
1133 norm +=fMethodWeight[i];
1138 NoErrorCalc(err, errUpper);
1165 Data()->SetCurrentType(eTT);
1171 if (singleMethod && !method) {
1172 Log() << kFATAL <<
" What do you do? Your method:"
1173 << fMethods.back()->GetName()
1174 <<
" seems not to be a propper TMVA method"
1183 std::vector<Double_t> OldMethodWeight(fMethodWeight);
1184 if (!singleMethod) {
1187 for (
UInt_t i=0; i<=fCurrentMethodIdx; i++)
1188 AllMethodsWeight += fMethodWeight.at(i);
1190 if (AllMethodsWeight != 0.0) {
1191 for (
UInt_t i=0; i<=fCurrentMethodIdx; i++)
1192 fMethodWeight[i] /= AllMethodsWeight;
1198 std::vector <Float_t>* mvaRes;
1200 mvaRes = fMVAvalues;
1202 mvaRes =
new std::vector <Float_t>(GetNEvents());
1203 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1205 (*mvaRes)[ievt] = singleMethod ? method->
GetMvaValue(&err) : GetMvaValue(&err);
1211 fMethodWeight = OldMethodWeight;
1214 Int_t signalClass = 0;
1215 if (DataInfo().GetClassInfo(
"Signal") != 0) {
1216 signalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
1219 meanS, meanB, rmsS, rmsB,
xmin,
xmax, signalClass );
1228 TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1229 if (CalcOverlapIntergral) {
1230 mva_s_overlap =
new TH1F(
"MVA_S_OVERLAP",
"MVA_S_OVERLAP", fNbins,
xmin,
xmax );
1231 mva_b_overlap =
new TH1F(
"MVA_B_OVERLAP",
"MVA_B_OVERLAP", fNbins,
xmin,
xmax );
1233 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1234 const Event* ev = GetEvent(ievt);
1236 if (DataInfo().IsSignal(ev)) mva_s->
Fill( (*mvaRes)[ievt], w );
1237 else mva_b->
Fill( (*mvaRes)[ievt], w );
1239 if (CalcOverlapIntergral) {
1241 if (DataInfo().IsSignal(ev))
1242 mva_s_overlap->
Fill( (*mvaRes)[ievt], w_ov );
1244 mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1256 if (CalcOverlapIntergral) {
1260 fOverlap_integral = 0.0;
1263 Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1264 if (bc_s > 0.0 && bc_b > 0.0)
1268 delete mva_s_overlap;
1269 delete mva_b_overlap;
1291 Log() << kFATAL <<
"dynamic cast to MethodBase* failed" <<
Endl;
1295 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1319 results->
Store(
new TH1I(
"NodesBeforePruning",
"nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),
"NodesBeforePruning");
1320 results->
Store(
new TH1I(
"NodesAfterPruning",
"nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),
"NodesAfterPruning");
1333 Log() << kINFO <<
"<Train> average number of nodes before/after pruning : "
1345 if (methodIndex < 3){
1346 Log() << kDEBUG <<
"No detailed boost monitoring for "
1347 << GetCurrentMethod(methodIndex)->GetMethodName()
1348 <<
" yet available " <<
Endl;
1356 if (fDetailedMonitoring){
1358 if (DataInfo().GetNVariables() == 2) {
1359 results->
Store(
new TH2F(
Form(
"EventDistSig_%d",methodIndex),
Form(
"EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1361 results->
Store(
new TH2F(
Form(
"EventDistBkg_%d",methodIndex),
Form(
"EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1365 for (
Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1366 const Event* ev = GetEvent(ievt);
1372 if (DataInfo().IsSignal(ev))
h=results->
GetHist2D(
Form(
"EventDistSig_%d",methodIndex));
#define REGISTER_METHOD(CLASS)
for example
char * Form(const char *fmt,...)
virtual void SetMarkerColor(Color_t mcolor=1)
Set the marker color.
Describe directory structure in memory.
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
virtual Bool_t cd(const char *path=nullptr)
Change current directory to "this" directory.
1-D histogram with a double per channel (see TH1 documentation)}
1-D histogram with a float per channel (see TH1 documentation)}
1-D histogram with an int per channel (see TH1 documentation)}
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
virtual Int_t GetNbinsX() const
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
2-D histogram with a float per channel (see TH1 documentation)}
Service class for 2-Dim histogram classes.
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
class TMVA::Config::VariablePlotting fVariablePlotting
Class that contains all the data information.
void ScaleBoostWeight(Double_t s) const
void SetBoostWeight(Double_t w) const
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Double_t GetOriginalWeight() const
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Implementation of the GiniIndex as separation criterion.
Interface for all concrete MVA method implementations.
Virtual base Class for all MVA method.
void SetSilentFile(Bool_t status)
void SetWeightFileDir(TString fileDir)
set directory of weight file
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
virtual void TestClassification()
initialization
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
Types::EMVA GetMethodType() const
void SetSignalReferenceCut(Double_t cut)
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
void SetModelPersistence(Bool_t status)
Double_t GetSignalReferenceCut() const
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
Class for boosting a TMVA method.
void MonitorBoost(Types::EBoostStage stage, UInt_t methodIdx=0)
fill various monitoring histograms from information of the individual classifiers that have been boos...
void ResetBoostWeights()
resetting back the boosted weights of the events to 1
void SingleTrain()
initialization
DataSetManager * fDataSetManager
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
void CreateMVAHistorgrams()
Bool_t fHistoricBoolOption
void WriteMonitoringHistosToFile(void) const
write special monitoring histograms to file dummy implementation here --------------—
Double_t AdaBoost(MethodBase *method, Bool_t useYesNoLeaf)
the standard (discrete or real) AdaBoost algorithm
Bool_t BookMethod(Types::EMVA theMethod, TString methodTitle, TString theOption)
just registering the string from which the boosted classifier will be created
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
virtual void TestClassification()
initialization
void InitHistos()
initialisation routine
void ProcessOptions()
process user options
Double_t GetBoostROCIntegral(Bool_t, Types::ETreeType, Bool_t CalcOverlapIntergral=kFALSE)
Calculate the ROC integral of a single classifier or even the whole boosted classifier.
Double_t SingleBoost(MethodBase *method)
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
return boosted MVA response
Double_t Bagging()
Bagging or Bootstrap boosting, gives new random poisson weight for every event.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
Boost can handle classification with 2 classes and regression with one regression-target.
const Ranking * CreateRanking()
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
std::vector< Float_t > * fMVAvalues
virtual ~MethodBoost(void)
destructor
void FindMVACut(MethodBase *method)
find the CUT on the individual MVA that defines an event as correct or misclassified (to be used in t...
void GetHelpMessage() const
Get help message text.
Class for categorizing the phase space.
DataSetManager * fDataSetManager
Virtual base class for combining several TMVA method.
std::vector< IMethod * > fMethods
Analysis of Boosted Decision Trees.
Int_t GetNNodesBeforePruning()
static void InhibitOutput()
static void EnableOutput()
PDF wrapper for histograms; uses user-defined spline interpolation.
Double_t GetMVAProbAt(Double_t value)
void AddEvent(Double_t val, Double_t weight, Int_t type)
Ranking for variables in method (implementation)
Class that is the base-class for a vector of result.
TH2 * GetHist2D(const TString &alias) const
TH1 * GetHist(const TString &alias) const
void Store(TObject *obj, const char *alias=0)
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
virtual Double_t GetSeparationGain(const Double_t nSelS, const Double_t nSelB, const Double_t nTotS, const Double_t nTotB)
Separation Gain: the measure of how the quality of separation of the sample increases by splitting th...
virtual Double_t GetSeparationIndex(const Double_t s, const Double_t b)=0
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Singleton class for Global types used by TMVA.
TString GetMethodName(Types::EMVA method) const
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
virtual void Delete(Option_t *option="")
Delete this object.
Random number generator class based on M.
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
void ToLower()
Change string to lower-case.
A TTree represents a columnar dataset.
RooCmdArg Timer(Bool_t flag=kTRUE)
void GetMethodName(TString &name, TKey *mkey)
create variable transformations
MsgLogger & Endl(MsgLogger &ml)
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Short_t Max(Short_t a, Short_t b)
Short_t Min(Short_t a, Short_t b)
static long int sum(long int i)