149 using std::make_pair;
   167    , fSigToBkgFraction(0) 
   172    , fBaggedGradBoost(
kFALSE)
   176    , fMinNodeSizeS(
"5%")
   179    , fMinLinCorrForFisher(.8) 
   180    , fUseExclusiveVars(0)     
   182    , fNodePurityLimit(0)
   187    , fFValidationEvents(0)
   189    , fRandomisedTrees(
kFALSE)
   191    , fUsePoissonNvars(0)  
   192    , fUseNTrainEvents(0)
   193    , fBaggedSampleFraction(0)
   194    , fNoNegWeightsInTraining(
kFALSE)
   195    , fInverseBoostNegWeights(
kFALSE)
   196    , fPairNegWeightsGlobal(
kFALSE)
   197    , fTrainWithNegWeights(
kFALSE)
   207    , fSkipNormalization(
kFALSE)
   333    DeclareOptionRef(
fMinNodeSizeS=tmp, 
"MinNodeSize", 
"Minimum percentage of training events required in a leaf node (default: Classification: 5%, Regression: 0.2%)");
   335    DeclareOptionRef(
fNCuts, 
"nCuts", 
"Number of grid points in variable range used in finding optimal cut in node splitting");
   359    DeclareOptionRef(
fRandomisedTrees,
"UseRandomisedTrees",
"Determine at each node splitting the cut variable only as the best out of a random subset of variables (like in RandomForests)");
   361    DeclareOptionRef(
fUsePoissonNvars,
"UsePoissonNvars", 
"Interpret \"UseNvars\" not as fixed number but as mean of a Possion distribution in each split with RandomisedTree option");
   362    DeclareOptionRef(
fBaggedSampleFraction=.6,
"BaggedSampleFraction",
"Relative size of bagged event sample to original size of the data sample (used whenever bagging is used (i.e. UseBaggedBoost, Bagging,)" );
   365                     "Use Sig or Bkg categories, or the purity=S/(S+B) as classification of the leaf node -> Real-AdaBoost");
   370    DeclareOptionRef(
fNegWeightTreatment=
"InverseBoostNegWeights",
"NegWeightTreatment",
"How to treat events with negative weights in the BDT training (particular the boosting) : IgnoreInTraining;  Boost With inverse boostweight; Pair events with negative and positive weights in traning sample and *annihilate* them (experimental!)");
   405    DeclareOptionRef(
fHuberQuantile = 0.7, 
"HuberQuantile", 
"In the Huber loss function this is the quantile that separates the core from the tails in the residuals distribution.");
   419    DeclareOptionRef(
fPruneMethodS, 
"PruneMethod", 
"Note: for BDTs use small trees (e.g.MaxDepth=3) and NoPruning:  Pruning: Method used for pruning (removal) of statistically insignificant branches ");
   433    DeclareOptionRef(
fBaggedGradBoost=
kFALSE, 
"UseBaggedGrad",
"deprecated: Use *UseBaggedBoost* instead:  Use only a random subsample of all events for growing the trees in each iteration.");
   434    DeclareOptionRef(
fBaggedSampleFraction, 
"GradBaggingFraction",
"deprecated: Use *BaggedSampleFraction* instead: Defines the fraction of events to be used in each iteration, e.g. when UseBaggedGrad=kTRUE. ");
   435    DeclareOptionRef(
fUseNTrainEvents,
"UseNTrainEvents",
"deprecated: Use *BaggedSampleFraction* instead: Number of randomly picked training events used in randomised (and bagged) trees");
   449                     "Use weighted trees or simple average in classification from the forest");
   479       Log() << 
kFATAL << 
"<ProcessOptions> Huber Quantile must be in range [0,1]. Value given, " << 
fHuberQuantile << 
", does not match this criteria" << 
Endl;
   503             <<  
"Sorry autmoatic pruning strength determination is not implemented yet for ExpectedErrorPruning" << 
Endl;
   510             << 
"of events in a leaf node. This is DEPRECATED, please use the option \n"   511             << 
"*MinNodeSize* giving the relative number as percentage of training \n"   512             << 
"events instead. \n"   515       Log() << 
kWARNING << 
"Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded \n"    529    Log() << 
kINFO << 
"the option *InverseBoostNegWeights* does not exist for BoostType=Grad --> change" << 
Endl;
   530          Log() << 
kINFO << 
"to new default for GradBoost *Pray*" << 
Endl;
   531    Log() << 
kDEBUG << 
"i.e. simply keep them as if which should work fine for Grad Boost" << 
Endl;
   544       Log() << 
kWARNING << 
"You have chosen to use more than half of your training sample "   545             << 
"to optimize the automatic pruning algorithm. This is probably wasteful "   546             << 
"and your overall results will be degraded. Are you sure you want this?"   551    if (this->
Data()->HasNegativeEventWeights()){
   552       Log() << 
kINFO << 
" You are using a Monte Carlo that has also negative weights. "   553             << 
"That should in principle be fine as long as on average you end up with "   554             << 
"something positive. For this you have to make sure that the minimal number "   555             << 
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize="   557             <<
", (or the deprecated equivalent nEventsMin) you can set this via the "    558             <<
"BDT option string when booking the "   559             << 
"classifier) is large enough to allow for reasonable averaging!!! "   560             << 
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining  "   561             << 
"which ignores events with negative weight in the training. " << 
Endl   562             << 
Endl << 
"Note: You'll get a WARNING message during the training if that should ever happen" << 
Endl;
   567          Log() << 
kWARNING << 
"Regression Trees do not work with fUseYesNoLeaf=TRUE --> I will set it to FALSE" << 
Endl;
   572          Log() << 
kWARNING << 
"Regression Trees do not work with Separation type other than <RegressionVariance> --> I will use it instead" << 
Endl;
   576          Log() << 
kWARNING << 
"Sorry, UseFisherCuts is not available for regression analysis, I will ignore it!" << 
Endl;
   580          Log() << 
kWARNING << 
"Sorry, the option of nCuts<0 using a more elaborate node splitting algorithm " << 
Endl;
   581          Log() << 
kWARNING << 
"is not implemented for regression analysis ! " << 
Endl;
   582          Log() << 
kWARNING << 
"--> I switch do default nCuts = 20 and use standard node splitting"<<
Endl;
   587       Log() << 
kINFO << 
" Randomised trees use no pruning" << 
Endl;
   593       Log() << 
kWARNING << 
"When using the option UseFisherCuts, the other option nCuts<0 (i.e. using" << 
Endl;
   594       Log() << 
" a more elaborate node splitting algorithm) is not implemented. " << 
Endl;
   601       Log() << 
kERROR << 
" Zero Decision Trees demanded... that does not work !! "   602             << 
" I set it to 1 .. just so that the program does not crash"   619       Log() << 
kWARNING << 
" you specified the option NegWeightTreatment=PairNegWeightsGlobal : This option is still considered EXPERIMENTAL !! " << 
Endl;
   631             << 
"* \n this has been translated to MaxDepth="<<
fMaxDepth<<
Endl;
   644       Log() << 
kWARNING << 
"You have specified a deprecated option *UseBaggedGrad* --> please use  *UseBaggedBoost* instead" << 
Endl;
   653    if (sizeInPercent > 0 && sizeInPercent < 50){
   657       Log() << 
kFATAL << 
"you have demanded a minimal node size of "    658             << sizeInPercent << 
"% of the training events.. \n"   659             << 
" that somehow does not make sense "<<
Endl;
   670       Log() << 
kFATAL << 
"I had problems reading the option MinNodeEvents, which "   671             << 
"after removing a possible % sign now reads " << sizeInPercent << 
Endl;
   736    Log() << 
kDEBUG << 
" successfully(?) reset the method " << 
Endl;                                      
   766       std::vector<const TMVA::Event*> tmpEventSample;
   767       for (
Long64_t ievt=0; ievt<nevents; ievt++) {
   770          tmpEventSample.push_back(event);
   776       for (
UInt_t i=0; i<tmpEventSample.size(); i++) 
delete tmpEventSample[i];
   781       for (
Long64_t ievt=0; ievt<nevents; ievt++) {
   793             if (firstNegWeight) {
   794                Log() << 
kWARNING << 
" Note, you have events with negative event weight in the sample, but you've chosen to ignore them" << 
Endl;
   798          }
else if (event->GetWeight()==0){
   799             if (firstZeroWeight) {
   801                Log() << 
"Events with weight == 0 are going to be simply ignored " << 
Endl;
   805             if (event->GetWeight() < 0) {
   810                      Log() << 
kWARNING << 
"Events with negative event weights are found and "   811                            << 
" will be removed prior to the actual BDT training by global "   812                            << 
" paring (and subsequent annihilation) with positiv weight events"   815                      Log() << 
kWARNING << 
"Events with negative event weights are USED during "   816                            << 
"the BDT training. This might cause problems with small node sizes "    817                            << 
"or with the boosting. Please remove negative events from training "   818                            << 
"using the option *IgnoreEventsWithNegWeightsInTraining* in case you "   819                            << 
"observe problems with the boosting"   827                Int_t   imodulo = 
static_cast<Int_t>( fmod(modulo,1.0) > 0.5 ? 
ceil(modulo) : 
floor(modulo) );
   841                << 
"% of training used for validation)" << Endl;
   849       Log() << 
kDEBUG << 
"\t<InitEventSample> For classification trees, "<< 
Endl;
   850       Log() << 
kDEBUG << 
" \tthe effective number of backgrounds is scaled to match "<<
Endl;
   851       Log() << 
kDEBUG << 
" \tthe signal. Otherwise the first boosting step would do 'just that'!"<<
Endl;
   867       Int_t    sumSig=0, sumBkg=0;
   877       if (sumSigW && sumBkgW){
   880          Log() << 
kDEBUG << 
"\tre-normalise events such that Sig and Bkg have respective sum of weights = "    882          Log() << 
kDEBUG << 
"  \tsig->sig*"<<normSig << 
"ev. bkg->bkg*"<<normBkg << 
"ev." <<
Endl;
   883          Log() << 
kHEADER << 
"#events: (reweighted) sig: "<< sumSigW*normSig << 
" bkg: " << sumBkgW*normBkg << 
Endl;
   884          Log() << 
kINFO << 
"#events: (unweighted) sig: "<< sumSig << 
" bkg: " << sumBkg << 
Endl;
   885          for (
Long64_t ievt=0; ievt<nevents; ievt++) {
   890          Log() << 
kINFO << 
"--> could not determine scaleing factors as either there are " << 
Endl;
   891          Log() << 
kINFO << 
" no signal events (sumSigW="<<sumSigW<<
") or no bkg ev. (sumBkgW="<<sumBkgW<<
")"<<
Endl;
   925    std::vector<const Event*> negEvents;
   935    if (totalNegWeights == 0 ) {
   936       Log() << 
kINFO << 
"no negative event weights found .. no preprocessing necessary" << 
Endl;
   939       Log() << 
kINFO << 
"found a total of " << totalNegWeights << 
" of negative event weights which I am going to try to pair with positive events to annihilate them" << 
Endl;
   940       Log() << 
kINFO << 
"found a total of " << totalPosWeights << 
" of events with positive weights" << 
Endl;
   941       Log() << 
kINFO << 
"--> total sum of weights = " << totalWeights << 
" = " << totalNegWeights+totalPosWeights << 
Endl;
   948    for (
Int_t i=0; i<2; i++){
   949       invCov = ((*cov)[i]);
   951          std::cout << 
"<MethodBDT::PreProcessNeg...> matrix is almost singular with deterninant="   953                    << 
" did you use the variables that are linear combinations or highly correlated?"    957          std::cout << 
"<MethodBDT::PreProcessNeg...> matrix is singular with determinant="   959                    << 
" did you use the variables that are linear combinations?"    968    Log() << 
kINFO << 
"Found a total of " << totalNegWeights << 
" in negative weights out of " << 
fEventSample.size() << 
" training events "  << 
Endl;
   969    Timer timer(negEvents.size(),
"Negative Event paired");
   970    for (
UInt_t nev = 0; nev < negEvents.size(); nev++){
   971       timer.DrawProgressBar( nev );
   972       Double_t weight = negEvents[nev]->GetWeight();
   973       UInt_t  iClassID = negEvents[nev]->GetClass();
   974       invCov = ((*cov)[iClassID]);
   985                      dist += (negEvents[nev]->GetValue(ivar)-
fEventSample[iev]->GetValue(ivar))*
   986                         (*invCov)[ivar][jvar]*
   987                         (negEvents[nev]->GetValue(jvar)-
fEventSample[iev]->GetValue(jvar));
   990                if (dist < minDist) { iMin=iev; minDist=
dist;}
   998                negEvents[nev]->SetBoostWeight( 0 );
  1001                negEvents[nev]->SetBoostWeight( newWeight/negEvents[nev]->GetOriginalWeight() ); 
  1005          } 
else Log() << 
kFATAL << 
"preprocessing didn't find event to pair with the negative weight ... probably a bug" << 
Endl;
  1006          weight = negEvents[nev]->GetWeight();
  1009    Log() << 
kINFO << 
"<Negative Event Pairing> took: " << 
timer.GetElapsedTime()
  1013    totalNegWeights = 0;
  1014    totalPosWeights = 0;
  1021    std::vector<const Event*> newEventSample;
  1042    if (totalNegWeights < 0) 
Log() << 
kFATAL << 
" compenstion of negative event weights with positive ones did not work " << totalNegWeights << 
Endl;
  1047    Log() << 
kINFO  << 
" after PreProcessing, the Event sample is left with " << 
fEventSample.size() << 
" events (unweighted), all with positive weights, adding up to " << totalWeights << 
Endl;
  1048    Log() << 
kINFO  << 
" nSig="<<nSig << 
" sigWeight="<<sigWeight <<  
" nBkg="<<nBkg << 
" bkgWeight="<<bkgWeight << 
Endl;
  1062    std::map<TString,TMVA::Interval*> tuneParameters;
  1063    std::map<TString,Double_t> tunedParameters;
  1072    tuneParameters.insert(std::pair<TString,Interval*>(
"NTrees",         
new Interval(10,1000,5))); 
  1073    tuneParameters.insert(std::pair<TString,Interval*>(
"MaxDepth",       
new Interval(2,4,3)));    
  1074    tuneParameters.insert(std::pair<TString,Interval*>(
"MinNodeSize",    
new LogInterval(1,30,30)));    
  1080       tuneParameters.insert(std::pair<TString,Interval*>(
"AdaBoostBeta",   
new Interval(.2,1.,5)));   
  1083       tuneParameters.insert(std::pair<TString,Interval*>(
"Shrinkage",      
new Interval(0.05,0.50,5)));  
  1088       tuneParameters.insert(std::pair<TString,Interval*>(
"UseNvars",       
new Interval(min_var,max_var,4)));
  1092    Log()<<
kINFO << 
" the following BDT parameters will be tuned on the respective *grid*\n"<<
Endl;
  1093    std::map<TString,TMVA::Interval*>::iterator it;
  1094    for(it=tuneParameters.begin(); it!= tuneParameters.end(); it++){
  1096       std::ostringstream oss;
  1097       (it->second)->
Print(oss);
  1103    tunedParameters=optimize.
optimize();
  1105    return tunedParameters;
  1114    std::map<TString,Double_t>::iterator it;
  1115    for(it=tuneParameters.begin(); it!= tuneParameters.end(); it++){
  1118       else if (it->first ==  
"MinNodeSize"    ) 
SetMinNodeSize     (it->second);
  1122       else if (it->first ==  
"Shrinkage"      ) 
SetShrinkage       (it->second);
  1125       else Log() << 
kFATAL << 
" SetParameter for " << it->first << 
" not yet implemented " <<
Endl;
  1144       Log() << 
kERROR << 
" Zero Decision Trees demanded... that does not work !! "  1145             << 
" I set it to 1 .. just so that the program does not crash"  1151      std::vector<TString> titles = {
"Boost weight", 
"Error Fraction"};
  1160                              << 
"please remove the option from the configuration string, or "  1161                              << 
"use \"!Normalise\""  1179    TString hname = 
"AdaBooost weight distribution";
  1189       hname=
"Boost event weights distribution";
  1204       results->
Store(h, 
"BoostWeights");
  1209          TH2* boostMonitor = 
new TH2F(
"BoostMonitor",
"ROC Integral Vs iTree",2,0,
fNTrees,2,0,1.05);
  1211          boostMonitor->
SetYTitle(
"ROC Integral");
  1212          results->
Store(boostMonitor, 
"BoostMonitor");
  1214          boostMonitorGraph->
SetName(
"BoostMonitorGraph");
  1215          boostMonitorGraph->
SetTitle(
"ROCIntegralVsNTrees");
  1216          results->
Store(boostMonitorGraph, 
"BoostMonitorGraph");
  1223       results->
Store(h, 
"BoostWeightsVsTree");
  1229       results->
Store(h, 
"ErrorFrac");
  1232       nodesBeforePruningVsTree->
SetXTitle(
"#tree");
  1233       nodesBeforePruningVsTree->
SetYTitle(
"#tree nodes");
  1234       results->
Store(nodesBeforePruningVsTree);
  1237       nodesAfterPruningVsTree->
SetXTitle(
"#tree");
  1238       nodesAfterPruningVsTree->
SetYTitle(
"#tree nodes");
  1239       results->
Store(nodesAfterPruningVsTree);
  1249    Int_t nNodesBeforePruningCount = 0;
  1250    Int_t nNodesAfterPruningCount = 0;
  1252    Int_t nNodesBeforePruning = 0;
  1253    Int_t nNodesAfterPruning = 0;
  1263    while (itree < 
fNTrees && continueBoost){
  1279             Log() << 
kFATAL << 
"Multiclass is currently only supported by gradient boost. "  1280                   << 
"Please change boost option accordingly (GradBoost)."  1284          for (
UInt_t i=0;i<nClasses;i++){            
  1290                fForest.back()->SetUseFisherCuts();
  1314             fForest.back()->SetUseFisherCuts();
  1322             nNodesBeforePruning = 
fForest.back()->CleanTree();
  1325          nNodesBeforePruningCount += nNodesBeforePruning;
  1326          nodesBeforePruningVsTree->
SetBinContent(itree+1,nNodesBeforePruning);
  1331          std::vector<const Event*> * validationSample = 
NULL;
  1353          nNodesAfterPruning = 
fForest.back()->GetNNodes();
  1354          nNodesAfterPruningCount += nNodesAfterPruning;
  1355          nodesAfterPruningVsTree->
SetBinContent(itree+1,nNodesAfterPruning);
  1364                if (  itree==
fNTrees-1 ||  (!(itree%500)) ||
  1365                      (!(itree%250) && itree <1000)||
  1366                      (!(itree%100) && itree < 500)||
  1367                      (!(itree%50)  && itree < 250)||
  1368                      (!(itree%25)  && itree < 150)||
  1369                      (!(itree%10)  && itree <  50)||
  1370                      (!(itree%5)   && itree <  20)
  1382       Log() << 
kDEBUG << 
"\t<Train> average number of nodes (w/o pruning) : "  1386       Log() << 
kDEBUG << 
"\t<Train> average number of nodes before/after pruning : "  1387             << nNodesBeforePruningCount/
GetNTrees() << 
" / "  1396    Log() << 
kDEBUG << 
"Now I delete the privat data sample"<< 
Endl;
  1413    for (
UInt_t itree=0; itree<nTrees; itree++) {
  1418    return 2.0/(1.0+
exp(-2.0*sum))-1; 
  1428       for (std::vector<const TMVA::Event*>::iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1430          if(cls == nClasses-1){
  1431             for(
UInt_t i=0;i<nClasses;i++){
  1433                for(
UInt_t j=0;j<nClasses;j++){
  1438                Double_t res = ((*e)->GetClass()==i)?(1.0-p_cls):(-p_cls);
  1445       for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1473    std::map<TMVA::DecisionTreeNode*,std::vector<Double_t> > leaves;
  1474    for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1475       Double_t weight = (*e)->GetWeight();
  1477       if ((leaves[node]).empty()){
  1478          (leaves[node]).push_back((*e)->GetTarget(cls)* weight);
  1479          (leaves[node]).push_back(
fabs((*e)->GetTarget(cls))*(1.0-
fabs((*e)->GetTarget(cls))) * weight* weight);
  1482          (leaves[node])[0]+=((*e)->GetTarget(cls)* weight);
  1483          (leaves[node])[1]+=
fabs((*e)->GetTarget(cls))*(1.0-
fabs((*e)->GetTarget(cls))) * weight* weight;
  1487         iLeave!=leaves.end();++iLeave){
  1488       if ((iLeave->second)[1]<1
e-30) (iLeave->second)[1]=1
e-30;
  1506    std::map<TMVA::DecisionTreeNode*,vector< TMVA::LossFunctionEventInfo > > leaves;
  1507    for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1514    for (std::map<
TMVA::DecisionTreeNode*,vector< TMVA::LossFunctionEventInfo > >::iterator iLeave=leaves.begin();
  1515         iLeave!=leaves.end();++iLeave){
  1517       (iLeave->first)->SetResponse(
fShrinkage*fit);          
  1534       for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1544       for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1545          for (
UInt_t i=0;i<nClasses;i++){
  1547             Double_t r = (*e)->GetClass()==i?(1-1.0/nClasses):(-1.0/nClasses);
  1554       for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1579    return  ncorrect / (ncorrect + nfalse);
  1599          returnVal = this->
GradBoost (eventSample, dt, cls);
  1601          returnVal = this->
GradBoost (eventSample, dt);
  1624    TH1F *tmpS = 
new TH1F( 
"tmpS", 
"",     100 , -1., 1.00001 );
  1625    TH1F *tmpB = 
new TH1F( 
"tmpB", 
"",     100 , -1., 1.00001 );
  1640    for (
UInt_t iev=0; iev < nevents; iev++){
  1643       if (event->GetClass() == signalClassNr) {tmp=tmpS;}
  1649    std::vector<TH1F*> hS;
  1650    std::vector<TH1F*> hB;
  1652       hS.push_back(
new TH1F(
Form(
"SigVar%dAtTree%d",ivar,iTree),
Form(
"SigVar%dAtTree%d",ivar,iTree),100,
DataInfo().GetVariableInfo(ivar).GetMin(),
DataInfo().GetVariableInfo(ivar).GetMax()));
  1653       hB.push_back(
new TH1F(
Form(
"BkgVar%dAtTree%d",ivar,iTree),
Form(
"BkgVar%dAtTree%d",ivar,iTree),100,
DataInfo().GetVariableInfo(ivar).GetMin(),
DataInfo().GetVariableInfo(ivar).GetMax()));
  1654       results->
Store(hS.back(),hS.back()->GetTitle());
  1655       results->
Store(hB.back(),hB.back()->GetTitle());
  1662    TH1F *tmpBoostWeightsS = 
new TH1F(
Form(
"BoostWeightsInTreeS%d",iTree),
Form(
"BoostWeightsInTreeS%d",iTree),100,0.,max); 
  1663    TH1F *tmpBoostWeightsB = 
new TH1F(
Form(
"BoostWeightsInTreeB%d",iTree),
Form(
"BoostWeightsInTreeB%d",iTree),100,0.,max); 
  1664    results->
Store(tmpBoostWeightsS,tmpBoostWeightsS->
GetTitle());
  1665    results->
Store(tmpBoostWeightsB,tmpBoostWeightsB->
GetTitle());
  1667    TH1F *tmpBoostWeights;
  1668    std::vector<TH1F*> *
h;
  1672          tmpBoostWeights=tmpBoostWeightsS;
  1675          tmpBoostWeights=tmpBoostWeightsB;
  1716    Double_t err=0, sumGlobalw=0, sumGlobalwfalse=0, sumGlobalwfalse2=0;
  1718    std::vector<Double_t> sumw(
DataInfo().GetNClasses(),0); 
  1719    std::map<Node*,Int_t> sigEventsInNode; 
  1722    for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1725       UInt_t iclass=(*e)->GetClass();
  1730          sumGlobalwfalse += w * tmpDev;
  1731          sumGlobalwfalse2 += w * tmpDev*tmpDev;
  1732          if (tmpDev > maxDev) maxDev = tmpDev;
  1737             if (!(isSignalType == 
DataInfo().IsSignal(*
e))) {
  1738                sumGlobalwfalse+= w;
  1745             sumGlobalwfalse+= w*trueType*dtoutput;
  1750    err = sumGlobalwfalse/sumGlobalw ;
  1754          err = sumGlobalwfalse/maxDev/sumGlobalw ;
  1757          err = sumGlobalwfalse2/maxDev/maxDev/sumGlobalw ;
  1761          for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1764             err += w * (1 - 
exp (-tmpDev/maxDev)) / sumGlobalw;
  1769          Log() << 
kFATAL << 
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential "   1771                << 
"and this is not implemented... a typo in the options ??" <<
Endl;
  1775    Log() << 
kDEBUG << 
"BDT AdaBoos  wrong/all: " << sumGlobalwfalse << 
"/" << sumGlobalw << 
Endl;
  1779    std::vector<Double_t> newSumw(sumw.size(),0);
  1786          Log() << 
kERROR << 
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot "   1787                << 
"boost such a thing... if after 1 step the error rate is == 0.5"  1789                << 
"please check why this happens, maybe too many events per node requested ?"  1793          Log() << 
kERROR << 
" The error rate in the BDT boosting is > 0.5. ("<< err
  1794                << 
") That should not happen, please check your code (i.e... the BDT code), I "  1795                << 
" stop boosting here" <<  
Endl;
  1799    } 
else if (err < 0) {
  1800       Log() << 
kERROR << 
" The error rate in the BDT boosting is < 0. That can happen"  1801             << 
" due to improper treatment of negative weights in a Monte Carlo.. (if you have"  1802             << 
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)"  1803             << 
" for the time being I set it to its absolute value.. just to continue.." <<  
Endl;
  1812    Log() << 
kDEBUG << 
"BDT AdaBoos  wrong/all: " << sumGlobalwfalse << 
"/" << sumGlobalw << 
" 1-err/err="<<boostWeight<< 
" log.."<<
TMath::Log(boostWeight)<<
Endl;
  1817    for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1824             if ( (*e)->GetWeight() > 0 ){
  1825                (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
  1830                else (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
  1842          if ( (*e)->GetWeight() > 0 ){
  1843             (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
  1848             else (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
  1851       newSumGlobalw+=(*e)->GetWeight();
  1852       newSumw[(*e)->GetClass()] += (*e)->GetWeight();
  1858    Log() << 
kDEBUG << 
"new Nsig="<<newSumw[0]*globalNormWeight << 
" new Nbkg="<<newSumw[1]*globalNormWeight << 
Endl;
  1861    for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1866       else                (*e)->ScaleBoostWeight( globalNormWeight );
  1902    Double_t err=0, sumGlobalWeights=0, sumGlobalCost=0;
  1904    std::vector<Double_t> sumw(
DataInfo().GetNClasses(),0);      
  1905    std::map<Node*,Int_t> sigEventsInNode; 
  1907    for (vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1909       sumGlobalWeights += w;
  1910       UInt_t iclass=(*e)->GetClass();
  1915          Log() << 
kFATAL << 
" AdaCost not implemented for regression"<<
Endl;
  1921          Bool_t   isSelectedSignal = (dtoutput>0);
  1922          if (isTrueSignal) trueType = 1;
  1926          if       (isTrueSignal  && isSelectedSignal)  cost=Css;
  1927          else if  (isTrueSignal  && !isSelectedSignal) cost=Cts_sb;
  1928          else if  (!isTrueSignal  && isSelectedSignal) cost=Ctb_ss;
  1929          else if  (!isTrueSignal && !isSelectedSignal) cost=Cbb;
  1930          else Log() << 
kERROR << 
"something went wrong in AdaCost" << 
Endl;
  1932          sumGlobalCost+= w*trueType*dtoutput*cost;
  1938       Log() << 
kFATAL << 
" AdaCost not implemented for regression"<<
Endl;
  1943    sumGlobalCost /= sumGlobalWeights;
  1948    vector<Double_t> newSumClassWeights(sumw.size(),0);
  1954    for (vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1958       Bool_t   isSelectedSignal = (dtoutput>0);
  1959       if (isTrueSignal) trueType = 1;
  1963       if       (isTrueSignal  && isSelectedSignal)  cost=Css;
  1964       else if  (isTrueSignal  && !isSelectedSignal) cost=Cts_sb;
  1965       else if  (!isTrueSignal  && isSelectedSignal) cost=Ctb_ss;
  1966       else if  (!isTrueSignal && !isSelectedSignal) cost=Cbb;
  1967       else Log() << 
kERROR << 
"something went wrong in AdaCost" << 
Endl;
  1971       if ( (*e)->GetWeight() > 0 ){
  1972          (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
  1979       newSumGlobalWeights+=(*e)->GetWeight();
  1980       newSumClassWeights[(*e)->GetClass()] += (*e)->GetWeight();
  1985    Double_t globalNormWeight=
Double_t(eventSample.size())/newSumGlobalWeights;
  1986    Log() << 
kDEBUG << 
"new Nsig="<<newSumClassWeights[0]*globalNormWeight << 
" new Nbkg="<<newSumClassWeights[1]*globalNormWeight << 
Endl;
  1989    for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  1993       else                (*e)->ScaleBoostWeight( globalNormWeight );
  2032    for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  2069    if ( !
DoRegression() ) 
Log() << 
kFATAL << 
"Somehow you chose a regression boost method for a classification job" << 
Endl;
  2071    Double_t err=0, sumw=0, sumwfalse=0, sumwfalse2=0;
  2073    for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  2078       sumwfalse  += w * tmpDev;
  2079       sumwfalse2 += w * tmpDev*tmpDev;
  2080       if (tmpDev > maxDev) maxDev = tmpDev;
  2085       err = sumwfalse/maxDev/sumw ;
  2088       err = sumwfalse2/maxDev/maxDev/sumw ;
  2092       for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  2095          err += w * (1 - 
exp (-tmpDev/maxDev)) / sumw;
  2100       Log() << 
kFATAL << 
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential "   2102             << 
"and this is not implemented... a typo in the options ??" <<
Endl;
  2110          Log() << 
kERROR << 
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot "   2111                << 
"boost such a thing... if after 1 step the error rate is == 0.5"  2113                << 
"please check why this happens, maybe too many events per node requested ?"  2117          Log() << 
kERROR << 
" The error rate in the BDT boosting is > 0.5. ("<< err
  2118                << 
") That should not happen, but is possible for regression trees, and"  2119                << 
" should trigger a stop for the boosting. please check your code (i.e... the BDT code), I "  2120                << 
" stop boosting " <<  
Endl;
  2124    } 
else if (err < 0) {
  2125       Log() << 
kERROR << 
" The error rate in the BDT boosting is < 0. That can happen"  2126             << 
" due to improper treatment of negative weights in a Monte Carlo.. (if you have"  2127             << 
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)"  2128             << 
" for the time being I set it to its absolute value.. just to continue.." <<  
Endl;
  2132    Double_t boostWeight = err / (1.-err);
  2137    for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  2139       results->
GetHist(
"BoostWeights")->
Fill(boostfactor);
  2141       if ( (*e)->GetWeight() > 0 ){
  2142          Float_t newBoostWeight = (*e)->GetBoostWeight() * boostfactor;
  2143          Float_t newWeight = (*e)->GetWeight() * (*e)->GetBoostWeight() * boostfactor;
  2144          if (newWeight == 0) {
  2145             Log() << 
kINFO << 
"Weight=    "   <<   (*e)->GetWeight() << 
Endl;
  2146             Log() << 
kINFO  << 
"BoostWeight= " <<   (*e)->GetBoostWeight() << 
Endl;
  2147             Log() << 
kINFO  << 
"boostweight="<<boostWeight << 
"  err= " <<err << 
Endl; 
  2148             Log() << 
kINFO  << 
"NewBoostWeight= " <<   newBoostWeight << 
Endl;
  2149             Log() << 
kINFO  << 
"boostfactor= " <<  boostfactor << 
Endl;
  2152             Log() << 
kINFO  << 
"target     = " <<  (*e)->GetTarget(0)  << 
Endl; 
  2155          (*e)->SetBoostWeight( newBoostWeight );
  2158          (*e)->SetBoostWeight( (*e)->GetBoostWeight() / boostfactor);
  2160       newSumw+=(*e)->GetWeight();
  2164    Double_t normWeight =  sumw / newSumw;
  2165    for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();
e++) {
  2168       (*e)->SetBoostWeight( (*e)->GetBoostWeight() * normWeight );
  2206       void* trxml = 
fForest[i]->AddXMLTo(wght);
  2226    if (
gTools().HasAttr( parent, 
Form(
"PreselectionLowBkgVar%d",0))) {
  2261    if(
gTools().HasAttr(parent, 
"TreeType")) { 
  2272       fForest.back()->SetTreeID(i++);
  2286    Int_t analysisType(0);
  2290    Log() << 
kINFO << 
"Read " << fNTrees << 
" Decision trees" << 
Endl;
  2298       istr >> dummy >> iTree >> dummy >> boostWeight;
  2300          fForest.back()->Print( std::cout );
  2301          Log() << 
kFATAL << 
"Error while reading weight file; mismatch iTree="  2302                << iTree << 
" i=" << i
  2303                << 
" dummy " << dummy
  2304                << 
" boostweight " << boostWeight
  2350    if (useNTrees > 0 ) nTrees = useNTrees;
  2356    for (
UInt_t itree=0; itree<nTrees; itree++) {
  2374    std::vector<double> temp;
  2377    for(
UInt_t iClass=0; iClass<nClasses; iClass++){
  2378       temp.push_back(0.0);
  2379       for(
UInt_t itree = iClass; itree<
fForest.size(); itree+=nClasses){
  2384    for(
UInt_t iClass=0; iClass<nClasses; iClass++){
  2386       for(
UInt_t j=0;j<nClasses;j++){
  2388             norm+=
exp(temp[j]-temp[iClass]);
  2390       (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
  2425       vector< Double_t > response(
fForest.size());
  2426       vector< Double_t > weight(
fForest.size());
  2435       std::vector< std::vector<Double_t> > vtemp;
  2436       vtemp.push_back( response ); 
  2437       vtemp.push_back( weight ); 
  2442       while (sumOfWeights <= totalSumOfWeights/2.) {
  2443          sumOfWeights += vtemp[1][t];
  2513       for (
UInt_t i=0; i< relativeImportance.size(); i++) {
  2535    if (ivar < (
UInt_t)relativeImportance.size()) 
return relativeImportance[ivar];
  2536    else Log() << 
kFATAL << 
"<GetVariableImportance> ivar = " << ivar << 
" is out of range " << 
Endl;
  2569    Log() << 
"Boosted Decision Trees are a collection of individual decision" << 
Endl;
  2570    Log() << 
"trees which form a multivariate classifier by (weighted) majority " << 
Endl;
  2571    Log() << 
"vote of the individual trees. Consecutive decision trees are  " << 
Endl;
  2572    Log() << 
"trained using the original training data set with re-weighted " << 
Endl;
  2573    Log() << 
"events. By default, the AdaBoost method is employed, which gives " << 
Endl;
  2574    Log() << 
"events that were misclassified in the previous tree a larger " << 
Endl;
  2575    Log() << 
"weight in the training of the following tree." << 
Endl;
  2577    Log() << 
"Decision trees are a sequence of binary splits of the data sample" << 
Endl;
  2578    Log() << 
"using a single descriminant variable at a time. A test event " << 
Endl;
  2579    Log() << 
"ending up after the sequence of left-right splits in a final " << 
Endl;
  2580    Log() << 
"(\"leaf\") node is classified as either signal or background" << 
Endl;
  2581    Log() << 
"depending on the majority type of training events in that node." << 
Endl;
  2585    Log() << 
"By the nature of the binary splits performed on the individual" << 
Endl;
  2586    Log() << 
"variables, decision trees do not deal well with linear correlations" << 
Endl;
  2587    Log() << 
"between variables (they need to approximate the linear split in" << 
Endl;
  2588    Log() << 
"the two dimensional space by a sequence of splits on the two " << 
Endl;
  2589    Log() << 
"variables individually). Hence decorrelation could be useful " << 
Endl;
  2590    Log() << 
"to optimise the BDT performance." << 
Endl;
  2594    Log() << 
"The two most important parameters in the configuration are the  " << 
Endl;
  2595    Log() << 
"minimal number of events requested by a leaf node as percentage of the " <<
Endl;
  2596    Log() << 
"   number of training events (option \"MinNodeSize\"  replacing the actual number " << 
Endl;
  2597    Log() << 
" of events \"nEventsMin\" as given in earlier versions" << 
Endl;
  2598    Log() << 
"If this number is too large, detailed features " << 
Endl;
  2599    Log() << 
"in the parameter space are hard to be modelled. If it is too small, " << 
Endl;
  2600    Log() << 
"the risk to overtrain rises and boosting seems to be less effective" << 
Endl;
  2601    Log() << 
"  typical values from our current expericience for best performance  " << 
Endl;
  2602    Log() << 
"  are between 0.5(%) and 10(%) " << 
Endl;
  2604    Log() << 
"The default minimal number is currently set to " << 
Endl;
  2605    Log() << 
"   max(20, (N_training_events / N_variables^2 / 10)) " << 
Endl;
  2606    Log() << 
"and can be changed by the user." << 
Endl;
  2608    Log() << 
"The other crucial parameter, the pruning strength (\"PruneStrength\")," << 
Endl;
  2609    Log() << 
"is also related to overtraining. It is a regularisation parameter " << 
Endl;
  2610    Log() << 
"that is used when determining after the training which splits " << 
Endl;
  2611    Log() << 
"are considered statistically insignificant and are removed. The" << 
Endl;
  2612    Log() << 
"user is advised to carefully watch the BDT screen output for" << 
Endl;
  2613    Log() << 
"the comparison between efficiencies obtained on the training and" << 
Endl;
  2614    Log() << 
"the independent test sample. They should be equal within statistical" << 
Endl;
  2615    Log() << 
"errors, in order to minimize statistical fluctuations in different samples." << 
Endl;
  2627    fout << 
"   std::vector<"<<nodeName<<
"*> fForest;       // i.e. root nodes of decision trees" << std::endl;
  2628    fout << 
"   std::vector<double>                fBoostWeights; // the weights applied in the individual boosts" << std::endl;
  2629    fout << 
"};" << std::endl << std::endl;
  2630    fout << 
"double " << className << 
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
  2631    fout << 
"{" << std::endl;
  2632    fout << 
"   double myMVA = 0;" << std::endl;
  2636             fout << 
"   if (inputValues["<<ivar<<
"] < " << 
fLowBkgCut[ivar] << 
") return -1;  // is background preselection cut" << std::endl;
  2639             fout << 
"   if (inputValues["<<ivar<<
"] < "<< 
fLowSigCut[ivar] << 
") return  1;  // is signal preselection cut" << std::endl;
  2642             fout << 
"   if (inputValues["<<ivar<<
"] > "<<
fHighBkgCut[ivar] <<
")  return -1;  // is background preselection cut" << std::endl;
  2645             fout << 
"   if (inputValues["<<ivar<<
"] > "<<
fHighSigCut[ivar]<<
")  return  1;  // is signal preselection cut" << std::endl;
  2651       fout << 
"   double norm  = 0;" << std::endl;
  2653    fout << 
"   for (unsigned int itree=0; itree<fForest.size(); itree++){" << std::endl;
  2654    fout << 
"      "<<nodeName<<
" *current = fForest[itree];" << std::endl;
  2655    fout << 
"      while (current->GetNodeType() == 0) { //intermediate node" << std::endl;
  2656    fout << 
"         if (current->GoesRight(inputValues)) current=("<<nodeName<<
"*)current->GetRight();" << std::endl;
  2657    fout << 
"         else current=("<<nodeName<<
"*)current->GetLeft();" << std::endl;
  2658    fout << 
"      }" << std::endl;
  2660       fout << 
"      myMVA += current->GetResponse();" << std::endl;
  2662       if (
fUseYesNoLeaf) fout << 
"      myMVA += fBoostWeights[itree] *  current->GetNodeType();" << std::endl;
  2663       else               fout << 
"      myMVA += fBoostWeights[itree] *  current->GetPurity();" << std::endl;
  2664       fout << 
"      norm  += fBoostWeights[itree];" << std::endl;
  2666    fout << 
"   }" << std::endl;
  2668       fout << 
"   return 2.0/(1.0+exp(-2.0*myMVA))-1.0;" << std::endl;
  2670    else fout << 
"   return myMVA /= norm;" << std::endl;
  2671    fout << 
"};" << std::endl << std::endl;
  2672    fout << 
"void " << className << 
"::Initialize()" << std::endl;
  2673    fout << 
"{" << std::endl;
  2676       fout << 
"  // itree = " << itree << std::endl;
  2677       fout << 
"  fBoostWeights.push_back(" << 
fBoostWeights[itree] << 
");" << std::endl;
  2678       fout << 
"  fForest.push_back( " << std::endl;
  2680       fout <<
"   );" << std::endl;
  2682    fout << 
"   return;" << std::endl;
  2683    fout << 
"};" << std::endl;
  2684    fout << 
" " << std::endl;
  2685    fout << 
"// Clean up" << std::endl;
  2686    fout << 
"inline void " << className << 
"::Clear() " << std::endl;
  2687    fout << 
"{" << std::endl;
  2688    fout << 
"   for (unsigned int itree=0; itree<fForest.size(); itree++) { " << std::endl;
  2689    fout << 
"      delete fForest[itree]; " << std::endl;
  2690    fout << 
"   }" << std::endl;
  2691    fout << 
"}" << std::endl;
  2703    fout << 
"#define NN new "<<nodeName << std::endl; 
  2705    fout << 
"   " << std::endl;
  2706    fout << 
"#ifndef "<<nodeName<<
"__def" << std::endl;
  2707    fout << 
"#define "<<nodeName<<
"__def" << std::endl;
  2708    fout << 
"   " << std::endl;
  2709    fout << 
"class "<<nodeName<<
" {" << std::endl;
  2710    fout << 
"   " << std::endl;
  2711    fout << 
"public:" << std::endl;
  2712    fout << 
"   " << std::endl;
  2713    fout << 
"   // constructor of an essentially \"empty\" node floating in space" << std::endl;
  2714    fout << 
"   "<<nodeName<<
" ( "<<nodeName<<
"* left,"<<nodeName<<
"* right," << std::endl;
  2716       fout << 
"                          int nFisherCoeff," << std::endl;
  2718          fout << 
"                          double fisherCoeff"<<i<<
"," << std::endl;
  2721    fout << 
"                          int selector, double cutValue, bool cutType, " << std::endl;
  2722    fout << 
"                          int nodeType, double purity, double response ) :" << std::endl;
  2723    fout << 
"   fLeft         ( left         )," << std::endl;
  2724    fout << 
"   fRight        ( right        )," << std::endl;
  2725    if (
fUseFisherCuts) fout << 
"   fNFisherCoeff ( nFisherCoeff )," << std::endl;
  2726    fout << 
"   fSelector     ( selector     )," << std::endl;
  2727    fout << 
"   fCutValue     ( cutValue     )," << std::endl;
  2728    fout << 
"   fCutType      ( cutType      )," << std::endl;
  2729    fout << 
"   fNodeType     ( nodeType     )," << std::endl;
  2730    fout << 
"   fPurity       ( purity       )," << std::endl;
  2731    fout << 
"   fResponse     ( response     ){" << std::endl;
  2734          fout << 
"     fFisherCoeff.push_back(fisherCoeff"<<i<<
");" << std::endl;
  2737    fout << 
"   }" << std::endl << std::endl;
  2738    fout << 
"   virtual ~"<<nodeName<<
"();" << std::endl << std::endl;
  2739    fout << 
"   // test event if it decends the tree at this node to the right" << std::endl;
  2740    fout << 
"   virtual bool GoesRight( const std::vector<double>& inputValues ) const;" << std::endl;
  2741    fout << 
"   "<<nodeName<<
"* GetRight( void )  {return fRight; };" << std::endl << std::endl;
  2742    fout << 
"   // test event if it decends the tree at this node to the left " << std::endl;
  2743    fout << 
"   virtual bool GoesLeft ( const std::vector<double>& inputValues ) const;" << std::endl;
  2744    fout << 
"   "<<nodeName<<
"* GetLeft( void ) { return fLeft; };   " << std::endl << std::endl;
  2745    fout << 
"   // return  S/(S+B) (purity) at this node (from  training)" << std::endl << std::endl;
  2746    fout << 
"   double GetPurity( void ) const { return fPurity; } " << std::endl;
  2747    fout << 
"   // return the node type" << std::endl;
  2748    fout << 
"   int    GetNodeType( void ) const { return fNodeType; }" << std::endl;
  2749    fout << 
"   double GetResponse(void) const {return fResponse;}" << std::endl << std::endl;
  2750    fout << 
"private:" << std::endl << std::endl;
  2751    fout << 
"   "<<nodeName<<
"*   fLeft;     // pointer to the left daughter node" << std::endl;
  2752    fout << 
"   "<<nodeName<<
"*   fRight;    // pointer to the right daughter node" << std::endl;
  2754       fout << 
"   int                     fNFisherCoeff; // =0 if this node doesn use fisher, else =nvar+1 " << std::endl;
  2755       fout << 
"   std::vector<double>     fFisherCoeff;  // the fisher coeff (offset at the last element)" << std::endl;
  2757    fout << 
"   int                     fSelector; // index of variable used in node selection (decision tree)   " << std::endl;
  2758    fout << 
"   double                  fCutValue; // cut value appplied on this node to discriminate bkg against sig" << std::endl;
  2759    fout << 
"   bool                    fCutType;  // true: if event variable > cutValue ==> signal , false otherwise" << std::endl;
  2760    fout << 
"   int                     fNodeType; // Type of node: -1 == Bkg-leaf, 1 == Signal-leaf, 0 = internal " << std::endl;
  2761    fout << 
"   double                  fPurity;   // Purity of node from training"<< std::endl;
  2762    fout << 
"   double                  fResponse; // Regression response value of node" << std::endl;
  2763    fout << 
"}; " << std::endl;
  2764    fout << 
"   " << std::endl;
  2765    fout << 
"//_______________________________________________________________________" << std::endl;
  2766    fout << 
"   "<<nodeName<<
"::~"<<nodeName<<
"()" << std::endl;
  2767    fout << 
"{" << std::endl;
  2768    fout << 
"   if (fLeft  != NULL) delete fLeft;" << std::endl;
  2769    fout << 
"   if (fRight != NULL) delete fRight;" << std::endl;
  2770    fout << 
"}; " << std::endl;
  2771    fout << 
"   " << std::endl;
  2772    fout << 
"//_______________________________________________________________________" << std::endl;
  2773    fout << 
"bool "<<nodeName<<
"::GoesRight( const std::vector<double>& inputValues ) const" << std::endl;
  2774    fout << 
"{" << std::endl;
  2775    fout << 
"   // test event if it decends the tree at this node to the right" << std::endl;
  2776    fout << 
"   bool result;" << std::endl;
  2778       fout << 
"   if (fNFisherCoeff == 0){" << std::endl;
  2779       fout << 
"     result = (inputValues[fSelector] > fCutValue );" << std::endl;
  2780       fout << 
"   }else{" << std::endl;
  2781       fout << 
"     double fisher = fFisherCoeff.at(fFisherCoeff.size()-1);" << std::endl;
  2782       fout << 
"     for (unsigned int ivar=0; ivar<fFisherCoeff.size()-1; ivar++)" << std::endl;
  2783       fout << 
"       fisher += fFisherCoeff.at(ivar)*inputValues.at(ivar);" << std::endl;
  2784       fout << 
"     result = fisher > fCutValue;" << std::endl;
  2785       fout << 
"   }" << std::endl;
  2787       fout << 
"     result = (inputValues[fSelector] > fCutValue );" << std::endl;
  2789    fout << 
"   if (fCutType == true) return result; //the cuts are selecting Signal ;" << std::endl;
  2790    fout << 
"   else return !result;" << std::endl;
  2791    fout << 
"}" << std::endl;
  2792    fout << 
"   " << std::endl;
  2793    fout << 
"//_______________________________________________________________________" << std::endl;
  2794    fout << 
"bool "<<nodeName<<
"::GoesLeft( const std::vector<double>& inputValues ) const" << std::endl;
  2795    fout << 
"{" << std::endl;
  2796    fout << 
"   // test event if it decends the tree at this node to the left" << std::endl;
  2797    fout << 
"   if (!this->GoesRight(inputValues)) return true;" << std::endl;
  2798    fout << 
"   else return false;" << std::endl;
  2799    fout << 
"}" << std::endl;
  2800    fout << 
"   " << std::endl;
  2801    fout << 
"#endif" << std::endl;
  2802    fout << 
"   " << std::endl;
  2811       Log() << 
kFATAL << 
"MakeClassInstantiateNode: started with undefined node" <<
Endl;
  2814    fout << 
"NN("<<std::endl;
  2821    fout << 
", " <<std::endl;
  2828    fout << 
", " <<  std::endl
  2829         << std::setprecision(6);
  2857    Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;  
  2859    std::vector<TMVA::BDTEventWrapper> bdtEventSample;
  2874    for( std::vector<const TMVA::Event*>::const_iterator it = eventSample.begin(); it != eventSample.end(); ++it ) {
  2876          nTotS += (*it)->GetWeight();
  2880          nTotB += (*it)->GetWeight();
  2888       std::sort( bdtEventSample.begin(),bdtEventSample.end() ); 
  2890       Double_t bkgWeightCtr = 0.0, sigWeightCtr = 0.0;
  2891       std::vector<TMVA::BDTEventWrapper>::iterator it = bdtEventSample.begin(), it_end = bdtEventSample.end();
  2892       for( ; it != it_end; ++it ) {
  2894             sigWeightCtr += (**it)->GetWeight();
  2896             bkgWeightCtr += (**it)->GetWeight(); 
  2898          it->SetCumulativeWeight(
false,bkgWeightCtr); 
  2899          it->SetCumulativeWeight(
true,sigWeightCtr);
  2905       Double_t nSelS, nSelB, effS=0.05, effB=0.05, rejS=0.05, rejB=0.05;
  2906       Double_t tmpEffS, tmpEffB, tmpRejS, tmpRejB;
  2911       for(
UInt_t iev = 1; iev < bdtEventSample.size(); iev++) {
  2914          nSelS = bdtEventSample[iev].GetCumulativeWeight(
true);
  2915          nSelB = bdtEventSample[iev].GetCumulativeWeight(
false);
  2917          tmpEffS=nSelS/nTotS;
  2918          tmpEffB=nSelB/nTotB;
  2922          else if (nSelB==0     && tmpEffS>effS)  {effS=tmpEffS; 
fLowSigCut[ivar]  = bdtEventSample[iev].GetVal() - dVal; 
fIsLowSigCut[ivar]=
kTRUE;}
  2929    Log() << 
kDEBUG << 
" \tfound and suggest the following possible pre-selection cuts " << 
Endl;
  2930    if (
fDoPreselection) 
Log() << 
kDEBUG << 
"\tthe training will be done after these cuts... and GetMVA value returns +1, (-1) for a signal (bkg) event that passes these cuts" << 
Endl;
  2931    else  Log() << 
kDEBUG << 
"\tas option DoPreselection was not used, these cuts however will not be performed, but the training will see the full sample"<<
Endl;
 
Types::EAnalysisType fAnalysisType
 
void Train(void)
BDT training. 
 
void PreProcessNegativeEventWeights()
o.k. 
 
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1. 
 
double dist(Rotation3D const &r1, Rotation3D const &r2)
 
void GetBaggedSubSample(std::vector< const TMVA::Event *> &)
fills fEventSample with fBaggedSampleFraction*NEvents random training events 
 
static long int sum(long int i)
 
virtual Double_t Fit(std::vector< LossFunctionEventInfo > &evs)=0
 
Random number generator class based on M. 
 
THist< 1, int, THistStatContent > TH1I
 
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law. 
 
MsgLogger & Endl(MsgLogger &ml)
 
std::vector< Bool_t > fIsLowSigCut
 
Double_t RegBoost(std::vector< const TMVA::Event *> &, DecisionTree *dt)
a special boosting only for Regression ... 
 
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility 
 
std::map< const TMVA::Event *, LossFunctionEventInfo > fLossFunctionEventInfo
 
Bool_t fPairNegWeightsGlobal
 
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs. 
 
void SetUseNvars(Int_t n)
 
const Ranking * CreateRanking()
Compute ranking of input variables. 
 
virtual void Delete(Option_t *option="")
Delete this tree from memory or/and disk. 
 
Bool_t IsConstructedFromWeightFile() const
 
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
 
void BDT(TString dataset, const TString &fin="TMVA.root")
 
TString & ReplaceAll(const TString &s1, const TString &s2)
 
virtual Int_t Fill()
Fill all branches. 
 
virtual void SetName(const char *name)
Set the name of the TNamed. 
 
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
 
std::vector< Bool_t > fIsHighSigCut
 
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
 
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: nTrees number...
 
std::vector< Double_t > fVariableImportance
 
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number. 
 
void DeterminePreselectionCuts(const std::vector< const TMVA::Event *> &eventSample)
find useful preselection cuts that will be applied before and Decision Tree training. 
 
void MakeClassInstantiateNode(DecisionTreeNode *n, std::ostream &fout, const TString &className) const
recursively descends a tree and writes the node instance to the output streem 
 
Double_t fMinLinCorrForFisher
 
std::vector< const TMVA::Event * > fEventSample
 
Double_t Bagging()
call it boot-strapping, re-sampling or whatever you like, in the end it is nothing else but applying ...
 
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution: 
 
tomato 1-D histogram with a float per channel (see TH1 documentation)} 
 
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
 
Short_t Min(Short_t a, Short_t b)
 
void ToLower()
Change string to lower-case. 
 
virtual void SetYTitle(const char *title)
 
virtual void SetTitle(const char *title="")
Set graph title. 
 
Double_t AdaBoost(std::vector< const TMVA::Event *> &, DecisionTree *dt)
the AdaBoost implementation. 
 
UInt_t GetNClasses() const
 
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions" 
 
Int_t FloorNint(Double_t x)
 
void GetHelpMessage() const
Get help message text. 
 
Bool_t GetCutType(void) const
 
std::vector< Bool_t > fIsHighBkgCut
 
void SetShrinkage(Double_t s)
 
Double_t AdaCost(std::vector< const TMVA::Event *> &, DecisionTree *dt)
the AdaCost boosting algorithm takes a simple cost Matrix (currently fixed for all events...
 
void MakeClassSpecific(std::ostream &, const TString &) const
make ROOT-independent C++ class for classifier response (classifier-specific implementation) ...
 
TString GetElapsedTime(Bool_t Scientific=kTRUE)
 
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
 
TString fRegressionLossFunctionBDTGS
 
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
 
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
 
const TString & GetInputLabel(Int_t i) const
 
std::vector< Double_t > fHighBkgCut
 
Double_t GetGradBoostMVA(const TMVA::Event *e, UInt_t nTrees)
returns MVA value: -1 for background, 1 for signal 
 
Double_t fBaggedSampleFraction
 
Bool_t fInverseBoostNegWeights
 
Double_t GradBoostRegression(std::vector< const TMVA::Event *> &, DecisionTree *dt)
Implementation of M_TreeBoost using any loss function as desribed by Friedman 1999. 
 
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters accoding to the argument 
 
void MakeClassSpecificHeader(std::ostream &, const TString &) const
specific class header 
 
Float_t GetCutValue(void) const
 
UInt_t GetTrainingTMVAVersionCode() const
 
const Event * GetEvent() const
 
Double_t fSigToBkgFraction
 
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
BDT can handle classification with multiple classes and regression with one regression-target. 
 
UInt_t GetNFisherCoeff() const
 
void Reset(void)
reset the method, as if it had just been instantiated (forget all training etc.) 
 
TString & Append(const char *cs)
 
void SetMinNodeSize(Double_t sizeInPercent)
 
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title. 
 
DataSetInfo & DataInfo() const
 
Bool_t DoRegression() const
 
Double_t AdaBoostR2(std::vector< const TMVA::Event *> &, DecisionTree *dt)
adaption of the AdaBoost to regression problems (see H.Drucker 1997) 
 
std::vector< Double_t > fHighSigCut
 
Long64_t GetNTrainingEvents() const
 
const std::vector< Float_t > & GetMulticlassValues()
get the multiclass MVA response for the BDT classifier 
 
virtual void Print(Option_t *option="") const
Print TNamed name and title. 
 
const Event * GetTrainingEvent(Long64_t ievt) const
 
Bool_t fNoNegWeightsInTraining
 
Bool_t DoMulticlass() const
 
const std::vector< Float_t > & GetRegressionValues()
get the regression value generated by the BDTs 
 
void InitEventSample()
initialize the event sample (i.e. reset the boost-weights... etc) 
 
std::vector< Bool_t > fIsLowBkgCut
 
void WriteMonitoringHistosToFile(void) const
Here we could write some histograms created during the processing to the output file. 
 
virtual void Delete(Option_t *option="")
Delete this object. 
 
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
 
const Event * GetTestingEvent(Long64_t ievt) const
 
virtual Double_t Determinant() const
 
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory. 
 
Float_t GetTarget(UInt_t itgt) const
 
Bool_t HasTrainingTree() const
 
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
 
std::vector< Double_t > fLowBkgCut
 
Int_t GetNodeType(void) const
 
Double_t fNodePurityLimit
 
Service class for 2-Dim histogram classes. 
 
void SetBaggedSampleFraction(Double_t f)
 
const char * GetName() const
 
ClassInfo * GetClassInfo(Int_t clNum) const
 
std::map< TString, Double_t > optimize()
 
TGraph * GetGraph(const TString &alias) const
 
void BoostMonitor(Int_t iTree)
fills the ROCIntegral vs Itree from the testSample for the monitoring plots during the training ...
 
Double_t GetFisherCoeff(Int_t ivar) const
 
Bool_t fTrainWithNegWeights
 
Bool_t fSkipNormalization
 
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particulary Method instance (here appareantly called resultsName i...
 
virtual ~MethodBDT(void)
destructor Note: fEventSample and ValidationSample are already deleted at the end of TRAIN When they ...
 
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
 
void SetNodePurityLimit(Double_t l)
 
Double_t PrivateGetMvaValue(const TMVA::Event *ev, Double_t *err=0, Double_t *errUpper=0, UInt_t useNTrees=0)
Return the MVA value (range [-1;1]) that classifies the event according to the majority vote from the...
 
Double_t GradBoost(std::vector< const TMVA::Event *> &, DecisionTree *dt, UInt_t cls=0)
Calculate the desired response value for each region. 
 
char * Form(const char *fmt,...)
 
const TString & GetMethodName() const
 
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value 
 
SeparationBase * fSepType
 
void Init(void)
common initialisation with defaults for the BDT-Method 
 
void ReadWeightsFromXML(void *parent)
reads the BDT from the xml file 
 
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in... 
 
virtual const char * GetPath() const
Returns the full path of the directory. 
 
Double_t TestTreeQuality(DecisionTree *dt)
test the tree quality.. in terms of Miscalssification 
 
Long64_t GetNTestEvents() const
 
UInt_t GetNVariables() const
 
Float_t GetValue(UInt_t ivar) const
return value of i'th variable 
 
DecisionTree::EPruneMethod fPruneMethod
 
static void SetVarIndex(Int_t iVar)
 
Float_t GetPurity(void) const
 
Bool_t IgnoreEventsWithNegWeightsInTraining() const
 
void ReadWeightsFromStream(std::istream &istr)
read the weights (BDT coefficients) 
 
Double_t ApplyPreselectionCuts(const Event *ev)
aply the preselection cuts before even bothing about any Decision Trees in the GetMVA ...
 
void UpdateTargets(std::vector< const TMVA::Event *> &, UInt_t cls=0)
Calculate residua for all events;. 
 
std::vector< Float_t > * fMulticlassReturnVal
 
Bool_t IsNormalised() const
 
void SetMaxDepth(Int_t d)
 
TH1 * GetHist(const TString &alias) const
 
void AddWeightsXMLTo(void *parent) const
write weights to XML 
 
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML 
 
static RooMathCoreReg dummy
 
void SetAdaBoostBeta(Double_t b)
 
void SetCurrentType(Types::ETreeType type) const
 
std::vector< const TMVA::Event * > * fTrainSample
 
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
 
VariableInfo & GetVariableInfo(Int_t i)
 
void AddPreDefVal(const T &)
 
Double_t Boost(std::vector< const TMVA::Event *> &, DecisionTree *dt, UInt_t cls=0)
apply the boosting alogrithim (the algorithm is selecte via the the "option" given in the constructor...
 
const TString & GetOptions() const
 
LossFunctionBDT * fRegressionLossFunctionBDTG
 
TMatrixTSym< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant Notice that the LU decomposition is used instead of B...
 
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimzier with the set of paremeters and ranges that are meant to be tuned. 
 
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection. 
 
#define REGISTER_METHOD(CLASS)
for example 
 
TString fNegWeightTreatment
 
Abstract ClassifierFactory template that handles arbitrary types. 
 
virtual void SetXTitle(const char *title)
 
virtual void SetPoint(Int_t i, Double_t x, Double_t y)
Set x and y values for point number i. 
 
IPythonInteractive * fInteractive
 
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
 
Float_t GetResponse(void) const
 
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it. 
 
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
 
Short_t Max(Short_t a, Short_t b)
 
A Graph is a graphics object made of two arrays X and Y with npoints each. 
 
virtual DecisionTreeNode * GetLeft() const
 
std::vector< const TMVA::Event * > fValidationSample
 
std::vector< DecisionTree * > fForest
 
virtual DecisionTreeNode * GetRight() const
 
Bool_t IsSignal(const Event *ev) const
 
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
 
Double_t fFValidationEvents
 
std::vector< Double_t > fLowSigCut
 
std::vector< Float_t > * fRegressionReturnVal
 
Double_t Atof() const
Return floating-point value contained in string. 
 
void UpdateTargetsRegression(std::vector< const TMVA::Event *> &, Bool_t first=kFALSE)
Calculate current residuals for all events and update targets for next iteration. ...
 
Types::EAnalysisType GetAnalysisType() const
 
A TTree object has a header with a name and a title. 
 
Short_t GetSelector() const
 
std::map< const TMVA::Event *, std::vector< double > > fResiduals
 
void Store(TObject *obj, const char *alias=0)
 
static const Int_t fgDebugLevel
 
virtual void Init(std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap, std::vector< double > &boostWeights)=0
 
Double_t Sqrt(Double_t x)
 
virtual void Set(Int_t n)
Set number of points in the graph Existing coordinates are preserved New coordinates above fNpoints a...
 
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
 
double norm(double *x, double *p)
 
std::vector< const TMVA::Event * > fSubSample
 
Int_t CeilNint(Double_t x)
 
virtual void SetTargets(std::vector< const TMVA::Event *> &evs, std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap)=0
 
void InitGradBoost(std::vector< const TMVA::Event *> &)
initialize targets for first tree 
 
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
 
void SetSignalReferenceCut(Double_t cut)
 
virtual const char * GetTitle() const
Returns title of object. 
 
std::vector< double > fBoostWeights
 
MethodBDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
the standard constructor for the "boosted decision trees"