151#include <unordered_map> 
  172   , fSigToBkgFraction(0)
 
  177   , fBaggedGradBoost(
kFALSE)
 
  181   , fMinNodeSizeS(
"5%")
 
  184   , fMinLinCorrForFisher(.8) 
 
  185   , fUseExclusiveVars(0)     
 
  187   , fNodePurityLimit(0)
 
  192   , fFValidationEvents(0)
 
  194   , fRandomisedTrees(
kFALSE)
 
  196   , fUsePoissonNvars(0)  
 
  197   , fUseNTrainEvents(0)
 
  198   , fBaggedSampleFraction(0)
 
  199   , fNoNegWeightsInTraining(
kFALSE)
 
  200   , fInverseBoostNegWeights(
kFALSE)
 
  201   , fPairNegWeightsGlobal(
kFALSE)
 
  202   , fTrainWithNegWeights(
kFALSE)
 
  212   , fSkipNormalization(
kFALSE)
 
  227   , fSigToBkgFraction(0)
 
  232   , fBaggedGradBoost(
kFALSE)
 
  236   , fMinNodeSizeS(
"5%")
 
  239   , fMinLinCorrForFisher(.8) 
 
  240   , fUseExclusiveVars(0)     
 
  242   , fNodePurityLimit(0)
 
  247   , fFValidationEvents(0)
 
  249   , fRandomisedTrees(
kFALSE)
 
  251   , fUsePoissonNvars(0)  
 
  252   , fUseNTrainEvents(0)
 
  253   , fBaggedSampleFraction(0)
 
  254   , fNoNegWeightsInTraining(
kFALSE)
 
  255   , fInverseBoostNegWeights(
kFALSE)
 
  256   , fPairNegWeightsGlobal(
kFALSE)
 
  257   , fTrainWithNegWeights(
kFALSE)
 
  267   , fSkipNormalization(
kFALSE)
 
  337   DeclareOptionRef(fNTrees, 
"NTrees", 
"Number of trees in the forest");
 
  338   if (DoRegression()) {
 
  339      DeclareOptionRef(fMaxDepth=50,
"MaxDepth",
"Max depth of the decision tree allowed");
 
  341      DeclareOptionRef(fMaxDepth=3,
"MaxDepth",
"Max depth of the decision tree allowed");
 
  344   TString tmp=
"5%"; 
if (DoRegression()) tmp=
"0.2%";
 
  345   DeclareOptionRef(fMinNodeSizeS=tmp, 
"MinNodeSize", 
"Minimum percentage of training events required in a leaf node (default: Classification: 5%, Regression: 0.2%)");
 
  347   DeclareOptionRef(fNCuts, 
"nCuts", 
"Number of grid points in variable range used in finding optimal cut in node splitting");
 
  349   DeclareOptionRef(fBoostType, 
"BoostType", 
"Boosting type for the trees in the forest (note: AdaCost is still experimental)");
 
  351   AddPreDefVal(
TString(
"AdaBoost"));
 
  352   AddPreDefVal(
TString(
"RealAdaBoost"));
 
  353   AddPreDefVal(
TString(
"AdaCost"));
 
  354   AddPreDefVal(
TString(
"Bagging"));
 
  356   AddPreDefVal(
TString(
"AdaBoostR2"));
 
  358   if (DoRegression()) {
 
  359      fBoostType = 
"AdaBoostR2";
 
  361      fBoostType = 
"AdaBoost";
 
  363   DeclareOptionRef(fAdaBoostR2Loss=
"Quadratic", 
"AdaBoostR2Loss", 
"Type of Loss function in AdaBoostR2");
 
  364   AddPreDefVal(
TString(
"Linear"));
 
  365   AddPreDefVal(
TString(
"Quadratic"));
 
  366   AddPreDefVal(
TString(
"Exponential"));
 
  368   DeclareOptionRef(fBaggedBoost=
kFALSE, 
"UseBaggedBoost",
"Use only a random subsample of all events for growing the trees in each boost iteration.");
 
  369   DeclareOptionRef(fShrinkage = 1.0, 
"Shrinkage", 
"Learning rate for BoostType=Grad algorithm");
 
  370   DeclareOptionRef(fAdaBoostBeta=.5, 
"AdaBoostBeta", 
"Learning rate  for AdaBoost algorithm");
 
  371   DeclareOptionRef(fRandomisedTrees,
"UseRandomisedTrees",
"Determine at each node splitting the cut variable only as the best out of a random subset of variables (like in RandomForests)");
 
  372   DeclareOptionRef(fUseNvars,
"UseNvars",
"Size of the subset of variables used with RandomisedTree option");
 
  373   DeclareOptionRef(fUsePoissonNvars,
"UsePoissonNvars", 
"Interpret \"UseNvars\" not as fixed number but as mean of a Poisson distribution in each split with RandomisedTree option");
 
  374   DeclareOptionRef(fBaggedSampleFraction=.6,
"BaggedSampleFraction",
"Relative size of bagged event sample to original size of the data sample (used whenever bagging is used (i.e. UseBaggedBoost, Bagging,)" );
 
  376   DeclareOptionRef(fUseYesNoLeaf=
kTRUE, 
"UseYesNoLeaf",
 
  377                    "Use Sig or Bkg categories, or the purity=S/(S+B) as classification of the leaf node -> Real-AdaBoost");
 
  378   if (DoRegression()) {
 
  382   DeclareOptionRef(fNegWeightTreatment=
"InverseBoostNegWeights",
"NegWeightTreatment",
"How to treat events with negative weights in the BDT training (particular the boosting) : IgnoreInTraining;  Boost With inverse boostweight; Pair events with negative and positive weights in training sample and *annihilate* them (experimental!)");
 
  383   AddPreDefVal(
TString(
"InverseBoostNegWeights"));
 
  384   AddPreDefVal(
TString(
"IgnoreNegWeightsInTraining"));
 
  385   AddPreDefVal(
TString(
"NoNegWeightsInTraining"));    
 
  386   AddPreDefVal(
TString(
"PairNegWeightsGlobal"));
 
  391   DeclareOptionRef(fCss=1.,   
"Css",   
"AdaCost: cost of true signal selected signal");
 
  392   DeclareOptionRef(fCts_sb=1.,
"Cts_sb",
"AdaCost: cost of true signal selected bkg");
 
  393   DeclareOptionRef(fCtb_ss=1.,
"Ctb_ss",
"AdaCost: cost of true bkg    selected signal");
 
  394   DeclareOptionRef(fCbb=1.,   
"Cbb",   
"AdaCost: cost of true bkg    selected bkg ");
 
  396   DeclareOptionRef(fNodePurityLimit=0.5, 
"NodePurityLimit", 
"In boosting/pruning, nodes with purity > NodePurityLimit are signal; background otherwise.");
 
  399   DeclareOptionRef(fSepTypeS, 
"SeparationType", 
"Separation criterion for node splitting");
 
  400   AddPreDefVal(
TString(
"CrossEntropy"));
 
  401   AddPreDefVal(
TString(
"GiniIndex"));
 
  402   AddPreDefVal(
TString(
"GiniIndexWithLaplace"));
 
  403   AddPreDefVal(
TString(
"MisClassificationError"));
 
  404   AddPreDefVal(
TString(
"SDivSqrtSPlusB"));
 
  405   AddPreDefVal(
TString(
"RegressionVariance"));
 
  406   if (DoRegression()) {
 
  407      fSepTypeS = 
"RegressionVariance";
 
  409      fSepTypeS = 
"GiniIndex";
 
  412   DeclareOptionRef(fRegressionLossFunctionBDTGS = 
"Huber", 
"RegressionLossFunctionBDTG", 
"Loss function for BDTG regression.");
 
  413   AddPreDefVal(
TString(
"Huber"));
 
  414   AddPreDefVal(
TString(
"AbsoluteDeviation"));
 
  415   AddPreDefVal(
TString(
"LeastSquares"));
 
  417   DeclareOptionRef(fHuberQuantile = 0.7, 
"HuberQuantile", 
"In the Huber loss function this is the quantile that separates the core from the tails in the residuals distribution.");
 
  419   DeclareOptionRef(fDoBoostMonitor=
kFALSE,
"DoBoostMonitor",
"Create control plot with ROC integral vs tree number");
 
  421   DeclareOptionRef(fUseFisherCuts=
kFALSE, 
"UseFisherCuts", 
"Use multivariate splits using the Fisher criterion");
 
  422   DeclareOptionRef(fMinLinCorrForFisher=.8,
"MinLinCorrForFisher", 
"The minimum linear correlation between two variables demanded for use in Fisher criterion in node splitting");
 
  423   DeclareOptionRef(fUseExclusiveVars=
kFALSE,
"UseExclusiveVars",
"Variables already used in fisher criterion are not anymore analysed individually for node splitting");
 
  426   DeclareOptionRef(fDoPreselection=
kFALSE,
"DoPreselection",
"and and apply automatic pre-selection for 100% efficient signal (bkg) cuts prior to training");
 
  429   DeclareOptionRef(fSigToBkgFraction=1,
"SigToBkgFraction",
"Sig to Bkg ratio used in Training (similar to NodePurityLimit, which cannot be used in real adaboost");
 
  431   DeclareOptionRef(fPruneMethodS, 
"PruneMethod", 
"Note: for BDTs use small trees (e.g.MaxDepth=3) and NoPruning:  Pruning: Method used for pruning (removal) of statistically insignificant branches ");
 
  432   AddPreDefVal(
TString(
"NoPruning"));
 
  433   AddPreDefVal(
TString(
"ExpectedError"));
 
  434   AddPreDefVal(
TString(
"CostComplexity"));
 
  436   DeclareOptionRef(fPruneStrength, 
"PruneStrength", 
"Pruning strength");
 
  438   DeclareOptionRef(fFValidationEvents=0.5, 
"PruningValFraction", 
"Fraction of events to use for optimizing automatic pruning.");
 
  440   DeclareOptionRef(fSkipNormalization=
kFALSE, 
"SkipNormalization", 
"Skip normalization at initialization, to keep expectation value of BDT output according to the fraction of events");
 
  443   DeclareOptionRef(fMinNodeEvents=0, 
"nEventsMin", 
"deprecated: Use MinNodeSize (in % of training events) instead");
 
  445   DeclareOptionRef(fBaggedGradBoost=
kFALSE, 
"UseBaggedGrad",
"deprecated: Use *UseBaggedBoost* instead:  Use only a random subsample of all events for growing the trees in each iteration.");
 
  446   DeclareOptionRef(fBaggedSampleFraction, 
"GradBaggingFraction",
"deprecated: Use *BaggedSampleFraction* instead: Defines the fraction of events to be used in each iteration, e.g. when UseBaggedGrad=kTRUE. ");
 
  447   DeclareOptionRef(fUseNTrainEvents,
"UseNTrainEvents",
"deprecated: Use *BaggedSampleFraction* instead: Number of randomly picked training events used in randomised (and bagged) trees");
 
  448   DeclareOptionRef(fNNodesMax,
"NNodesMax",
"deprecated: Use MaxDepth instead to limit the tree size" );
 
  460   DeclareOptionRef(fHistoricBool=
kTRUE, 
"UseWeightedTrees",
 
  461                    "Use weighted trees or simple average in classification from the forest");
 
  462   DeclareOptionRef(fHistoricBool=
kFALSE, 
"PruneBeforeBoost", 
"Flag to prune the tree before applying boosting algorithm");
 
  463   DeclareOptionRef(fHistoricBool=
kFALSE,
"RenormByClass",
"Individually re-normalize each event class to the original size after boosting");
 
  465   AddPreDefVal(
TString(
"NegWeightTreatment"),
TString(
"IgnoreNegWeights"));
 
  476   else if (fSepTypeS == 
"giniindex")              fSepType = 
new GiniIndex();
 
  478   else if (fSepTypeS == 
"crossentropy")           fSepType = 
new CrossEntropy();
 
  479   else if (fSepTypeS == 
"sdivsqrtsplusb")         fSepType = 
new SdivSqrtSplusB();
 
  480   else if (fSepTypeS == 
"regressionvariance")     fSepType = NULL;
 
  482      Log() << kINFO << GetOptions() << 
Endl;
 
  483      Log() << kFATAL << 
"<ProcessOptions> unknown Separation Index option " << fSepTypeS << 
" called" << 
Endl;
 
  486   if(!(fHuberQuantile >= 0.0 && fHuberQuantile <= 1.0)){
 
  487      Log() << kINFO << GetOptions() << 
Endl;
 
  488      Log() << kFATAL << 
"<ProcessOptions> Huber Quantile must be in range [0,1]. Value given, " << fHuberQuantile << 
", does not match this criteria" << 
Endl;
 
  492   fRegressionLossFunctionBDTGS.ToLower();
 
  493   if      (fRegressionLossFunctionBDTGS == 
"huber")                  fRegressionLossFunctionBDTG = 
new HuberLossFunctionBDT(fHuberQuantile);
 
  497      Log() << kINFO << GetOptions() << 
Endl;
 
  498      Log() << kFATAL << 
"<ProcessOptions> unknown Regression Loss Function BDT option " << fRegressionLossFunctionBDTGS << 
" called" << 
Endl;
 
  501   fPruneMethodS.ToLower();
 
  506      Log() << kINFO << GetOptions() << 
Endl;
 
  507      Log() << kFATAL << 
"<ProcessOptions> unknown PruneMethod " << fPruneMethodS << 
" option called" << 
Endl;
 
  513            <<  
"Sorry automatic pruning strength determination is not implemented yet for ExpectedErrorPruning" << 
Endl;
 
  517   if (fMinNodeEvents > 0){
 
  518      fMinNodeSize = 
Double_t(fMinNodeEvents*100.) / Data()->GetNTrainingEvents();
 
  519      Log() << kWARNING << 
"You have explicitly set ** nEventsMin = " << fMinNodeEvents<<
" ** the min absolute number \n" 
  520            << 
"of events in a leaf node. This is DEPRECATED, please use the option \n" 
  521            << 
"*MinNodeSize* giving the relative number as percentage of training \n" 
  522            << 
"events instead. \n" 
  523            << 
"nEventsMin="<<fMinNodeEvents<< 
"--> MinNodeSize="<<fMinNodeSize<<
"%" 
  525      Log() << kWARNING << 
"Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recommended \n" 
  526            << 
" *MinNodeSize* = " << fMinNodeSizeS << 
" option !!" << 
Endl ;
 
  527      fMinNodeSizeS = 
Form(
"%F3.2",fMinNodeSize);
 
  530      SetMinNodeSize(fMinNodeSizeS);
 
  534   fAdaBoostR2Loss.ToLower();
 
  536   if (fBoostType==
"Grad") {
 
  538      if (fNegWeightTreatment==
"InverseBoostNegWeights"){
 
  539         Log() << kINFO << 
"the option NegWeightTreatment=InverseBoostNegWeights does" 
  540               << 
" not exist for BoostType=Grad" << 
Endl;
 
  541         Log() << kINFO << 
"--> change to new default NegWeightTreatment=Pray" << 
Endl;
 
  542         Log() << kDEBUG << 
"i.e. simply keep them as if which should work fine for Grad Boost" << 
Endl;
 
  543         fNegWeightTreatment=
"Pray";
 
  544         fNoNegWeightsInTraining=
kFALSE;
 
  546   } 
else if (fBoostType==
"RealAdaBoost"){
 
  547      fBoostType    = 
"AdaBoost";
 
  549   } 
else if (fBoostType==
"AdaCost"){
 
  553   if (fFValidationEvents < 0.0) fFValidationEvents = 0.0;
 
  554   if (fAutomatic && fFValidationEvents > 0.5) {
 
  555      Log() << kWARNING << 
"You have chosen to use more than half of your training sample " 
  556            << 
"to optimize the automatic pruning algorithm. This is probably wasteful " 
  557            << 
"and your overall results will be degraded. Are you sure you want this?" 
  562   if (this->Data()->HasNegativeEventWeights()){
 
  563      Log() << kINFO << 
" You are using a Monte Carlo that has also negative weights. " 
  564            << 
"That should in principle be fine as long as on average you end up with " 
  565            << 
"something positive. For this you have to make sure that the minimal number " 
  566            << 
"of (un-weighted) events demanded for a tree node (currently you use: MinNodeSize=" 
  567            << fMinNodeSizeS << 
"  ("<< fMinNodeSize << 
"%)" 
  568            <<
", (or the deprecated equivalent nEventsMin) you can set this via the " 
  569            <<
"BDT option string when booking the " 
  570            << 
"classifier) is large enough to allow for reasonable averaging!!! " 
  571            << 
" If this does not help.. maybe you want to try the option: IgnoreNegWeightsInTraining  " 
  572            << 
"which ignores events with negative weight in the training. " << 
Endl 
  573            << 
Endl << 
"Note: You'll get a WARNING message during the training if that should ever happen" << 
Endl;
 
  576   if (DoRegression()) {
 
  577      if (fUseYesNoLeaf && !IsConstructedFromWeightFile()){
 
  578         Log() << kWARNING << 
"Regression Trees do not work with fUseYesNoLeaf=TRUE --> I will set it to FALSE" << 
Endl;
 
  582      if (fSepType != NULL){
 
  583         Log() << kWARNING << 
"Regression Trees do not work with Separation type other than <RegressionVariance> --> I will use it instead" << 
Endl;
 
  587         Log() << kWARNING << 
"Sorry, UseFisherCuts is not available for regression analysis, I will ignore it!" << 
Endl;
 
  591         Log() << kWARNING << 
"Sorry, the option of nCuts<0 using a more elaborate node splitting algorithm " << 
Endl;
 
  592         Log() << kWARNING << 
"is not implemented for regression analysis ! " << 
Endl;
 
  593         Log() << kWARNING << 
"--> I switch do default nCuts = 20 and use standard node splitting"<<
Endl;
 
  597   if (fRandomisedTrees){
 
  598      Log() << kINFO << 
" Randomised trees use no pruning" << 
Endl;
 
  603   if (fUseFisherCuts) {
 
  604      Log() << kWARNING << 
"When using the option UseFisherCuts, the other option nCuts<0 (i.e. using" << 
Endl;
 
  605      Log() << 
" a more elaborate node splitting algorithm) is not implemented. " << 
Endl;
 
  612      Log() << kERROR << 
" Zero Decision Trees demanded... that does not work !! " 
  613            << 
" I set it to 1 .. just so that the program does not crash" 
  618   fNegWeightTreatment.ToLower();
 
  619   if      (fNegWeightTreatment == 
"ignorenegweightsintraining")   fNoNegWeightsInTraining = 
kTRUE;
 
  620   else if (fNegWeightTreatment == 
"nonegweightsintraining")   fNoNegWeightsInTraining = 
kTRUE;
 
  621   else if (fNegWeightTreatment == 
"inverseboostnegweights") fInverseBoostNegWeights = 
kTRUE;
 
  622   else if (fNegWeightTreatment == 
"pairnegweightsglobal")   fPairNegWeightsGlobal   = 
kTRUE;
 
  623   else if (fNegWeightTreatment == 
"pray")   
Log() << kDEBUG << 
"Yes, good luck with praying " << 
Endl;
 
  625      Log() << kINFO << GetOptions() << 
Endl;
 
  626      Log() << kFATAL << 
"<ProcessOptions> unknown option for treating negative event weights during training " << fNegWeightTreatment << 
" requested" << 
Endl;
 
  629   if (fNegWeightTreatment == 
"pairnegweightsglobal")
 
  630      Log() << kWARNING << 
" you specified the option NegWeightTreatment=PairNegWeightsGlobal : This option is still considered EXPERIMENTAL !! " << 
Endl;
 
  637      while (tmp < fNNodesMax){
 
  641      Log() << kWARNING << 
"You have specified a deprecated option *NNodesMax="<<fNNodesMax
 
  642            << 
"* \n this has been translated to MaxDepth="<<fMaxDepth<<
Endl;
 
  646   if (fUseNTrainEvents>0){
 
  647      fBaggedSampleFraction  = (
Double_t) fUseNTrainEvents/Data()->GetNTrainingEvents();
 
  648      Log() << kWARNING << 
"You have specified a deprecated option *UseNTrainEvents="<<fUseNTrainEvents
 
  649            << 
"* \n this has been translated to BaggedSampleFraction="<<fBaggedSampleFraction<<
"(%)"<<
Endl;
 
  652   if (fBoostType==
"Bagging") fBaggedBoost = 
kTRUE;
 
  653   if (fBaggedGradBoost){
 
  654      fBaggedBoost = 
kTRUE;
 
  655      Log() << kWARNING << 
"You have specified a deprecated option *UseBaggedGrad* --> please use  *UseBaggedBoost* instead" << 
Endl;
 
  663   if (sizeInPercent > 0 && sizeInPercent < 50){
 
  664      fMinNodeSize=sizeInPercent;
 
  667      Log() << kFATAL << 
"you have demanded a minimal node size of " 
  668            << sizeInPercent << 
"% of the training events.. \n" 
  669            << 
" that somehow does not make sense "<<
Endl;
 
  679   if (sizeInPercent.
IsFloat()) SetMinNodeSize(sizeInPercent.
Atof());
 
  681      Log() << kFATAL << 
"I had problems reading the option MinNodeEvents, which " 
  682            << 
"after removing a possible % sign now reads " << sizeInPercent << 
Endl;
 
  694      fBoostType      = 
"AdaBoost";
 
  695      if(DataInfo().GetNClasses()!=0) 
 
  699      fBoostType      = 
"AdaBoostR2";
 
  700      fAdaBoostR2Loss = 
"Quadratic";
 
  701      if(DataInfo().GetNClasses()!=0) 
 
  707   fPruneMethodS   = 
"NoPruning";
 
  711   fFValidationEvents = 0.5;
 
  712   fRandomisedTrees = 
kFALSE;
 
  715   fUsePoissonNvars = 
kTRUE;
 
  720   SetSignalReferenceCut( 0 );
 
  733   for (
UInt_t i=0; i<fForest.size();           i++) 
delete fForest[i];
 
  736   fBoostWeights.clear();
 
  737   if (fMonitorNtuple) { fMonitorNtuple->Delete(); fMonitorNtuple=NULL; }
 
  738   fVariableImportance.clear();
 
  740   fLossFunctionEventInfo.clear();
 
  745   Log() << kDEBUG << 
" successfully(?) reset the method " << 
Endl;
 
  757   for (
UInt_t i=0; i<fForest.size();           i++) 
delete fForest[i];
 
  765   if (!HasTrainingTree()) 
Log() << kFATAL << 
"<Init> Data().TrainingTree() is zero pointer" << 
Endl;
 
  767   if (fEventSample.size() > 0) { 
 
  769      for (
UInt_t iev=0; iev<fEventSample.size(); iev++) fEventSample[iev]->SetBoostWeight(1.);
 
  772      UInt_t nevents = Data()->GetNTrainingEvents();
 
  774      std::vector<const TMVA::Event*> tmpEventSample;
 
  775      for (
Long64_t ievt=0; ievt<nevents; ievt++) {
 
  777         Event* 
event = 
new Event( *GetTrainingEvent(ievt) );
 
  778         tmpEventSample.push_back(event);
 
  781      if (!DoRegression()) DeterminePreselectionCuts(tmpEventSample);
 
  782      else fDoPreselection = 
kFALSE; 
 
  784      for (
UInt_t i=0; i<tmpEventSample.size(); i++) 
delete tmpEventSample[i];
 
  789      for (
Long64_t ievt=0; ievt<nevents; ievt++) {
 
  792         Event* 
event = 
new Event( *GetTrainingEvent(ievt) );
 
  793         if (fDoPreselection){
 
  794            if (
TMath::Abs(ApplyPreselectionCuts(event)) > 0.05) {
 
  800         if (event->GetWeight() < 0 && (IgnoreEventsWithNegWeightsInTraining() || fNoNegWeightsInTraining)){
 
  801            if (firstNegWeight) {
 
  802               Log() << kWARNING << 
" Note, you have events with negative event weight in the sample, but you've chosen to ignore them" << 
Endl;
 
  806         }
else if (event->GetWeight()==0){
 
  807            if (firstZeroWeight) {
 
  809               Log() << 
"Events with weight == 0 are going to be simply ignored " << 
Endl;
 
  813            if (event->GetWeight() < 0) {
 
  814               fTrainWithNegWeights=
kTRUE;
 
  817                  if (fPairNegWeightsGlobal){
 
  818                     Log() << kWARNING << 
"Events with negative event weights are found and " 
  819                           << 
" will be removed prior to the actual BDT training by global " 
  820                           << 
" paring (and subsequent annihilation) with positiv weight events" 
  823                     Log() << kWARNING << 
"Events with negative event weights are USED during " 
  824                           << 
"the BDT training. This might cause problems with small node sizes " 
  825                           << 
"or with the boosting. Please remove negative events from training " 
  826                           << 
"using the option *IgnoreEventsWithNegWeightsInTraining* in case you " 
  827                           << 
"observe problems with the boosting" 
  834               Double_t modulo = 1.0/(fFValidationEvents);
 
  835               Int_t   imodulo = 
static_cast<Int_t>( fmod(modulo,1.0) > 0.5 ? 
ceil(modulo) : 
floor(modulo) );
 
  836               if (ievt % imodulo == 0) fValidationSample.push_back( event );
 
  837               else                     fEventSample.push_back( event );
 
  840               fEventSample.push_back(event);
 
  846         Log() << kINFO << 
"<InitEventSample> Internally I use " << fEventSample.size()
 
  847               << 
" for Training  and " << fValidationSample.size()
 
  848               << 
" for Pruning Validation (" << ((
Float_t)fValidationSample.size())/((
Float_t)fEventSample.size()+fValidationSample.size())*100.0
 
  849               << 
"% of training used for validation)" << 
Endl;
 
  853      if (fPairNegWeightsGlobal) PreProcessNegativeEventWeights();
 
  856   if (DoRegression()) {
 
  858   } 
else if (DoMulticlass()) {
 
  860   } 
else if (!fSkipNormalization) {
 
  862      Log() << kDEBUG << 
"\t<InitEventSample> For classification trees, "<< 
Endl;
 
  863      Log() << kDEBUG << 
" \tthe effective number of backgrounds is scaled to match "<<
Endl;
 
  864      Log() << kDEBUG << 
" \tthe signal. Otherwise the first boosting step would do 'just that'!"<<
Endl;
 
  878      Double_t nevents = fEventSample.size();
 
  880      Int_t    sumSig=0, sumBkg=0;
 
  881      for (
UInt_t ievt=0; ievt<fEventSample.size(); ievt++) {
 
  882         if ((DataInfo().IsSignal(fEventSample[ievt])) ) {
 
  883            sumSigW += fEventSample[ievt]->GetWeight();
 
  886            sumBkgW += fEventSample[ievt]->GetWeight();
 
  890      if (sumSigW && sumBkgW){
 
  891         Double_t normSig = nevents/((1+fSigToBkgFraction)*sumSigW)*fSigToBkgFraction;
 
  892         Double_t normBkg = nevents/((1+fSigToBkgFraction)*sumBkgW); ;
 
  893         Log() << kDEBUG << 
"\tre-normalise events such that Sig and Bkg have respective sum of weights = " 
  894               << fSigToBkgFraction << 
Endl;
 
  895         Log() << kDEBUG << 
"  \tsig->sig*"<<normSig << 
"ev. bkg->bkg*"<<normBkg << 
"ev." <<
Endl;
 
  896         Log() << kHEADER << 
"#events: (reweighted) sig: "<< sumSigW*normSig << 
" bkg: " << sumBkgW*normBkg << 
Endl;
 
  897         Log() << kINFO << 
"#events: (unweighted) sig: "<< sumSig << 
" bkg: " << sumBkg << 
Endl;
 
  898         for (
Long64_t ievt=0; ievt<nevents; ievt++) {
 
  899            if ((DataInfo().IsSignal(fEventSample[ievt])) ) fEventSample[ievt]->SetBoostWeight(normSig);
 
  900            else                                            fEventSample[ievt]->SetBoostWeight(normBkg);
 
  903         Log() << kINFO << 
"--> could not determine scaling factors as either there are " << 
Endl;
 
  904         Log() << kINFO << 
" no signal events (sumSigW="<<sumSigW<<
") or no bkg ev. (sumBkgW="<<sumBkgW<<
")"<<
Endl;
 
  909   fTrainSample = &fEventSample;
 
  911      GetBaggedSubSample(fEventSample);
 
  912      fTrainSample = &fSubSample;
 
  938   std::vector<const Event*> negEvents;
 
  939   for (
UInt_t iev = 0; iev < fEventSample.size(); iev++){
 
  940      if (fEventSample[iev]->GetWeight() < 0) {
 
  941         totalNegWeights += fEventSample[iev]->GetWeight();
 
  942         negEvents.push_back(fEventSample[iev]);
 
  944         totalPosWeights += fEventSample[iev]->GetWeight();
 
  946      totalWeights += fEventSample[iev]->GetWeight();
 
  948   if (totalNegWeights == 0 ) {
 
  949      Log() << kINFO << 
"no negative event weights found .. no preprocessing necessary" << 
Endl;
 
  952      Log() << kINFO << 
"found a total of " << totalNegWeights << 
" of negative event weights which I am going to try to pair with positive events to annihilate them" << 
Endl;
 
  953      Log() << kINFO << 
"found a total of " << totalPosWeights << 
" of events with positive weights" << 
Endl;
 
  954      Log() << kINFO << 
"--> total sum of weights = " << totalWeights << 
" = " << totalNegWeights+totalPosWeights << 
Endl;
 
  961   for (
Int_t i=0; i<2; i++){
 
  962      invCov = ((*cov)[i]);
 
  964         std::cout << 
"<MethodBDT::PreProcessNeg...> matrix is almost singular with determinant=" 
  966                   << 
" did you use the variables that are linear combinations or highly correlated?" 
  970         std::cout << 
"<MethodBDT::PreProcessNeg...> matrix is singular with determinant=" 
  972                   << 
" did you use the variables that are linear combinations?" 
  981   Log() << kINFO << 
"Found a total of " << totalNegWeights << 
" in negative weights out of " << fEventSample.size() << 
" training events "  << 
Endl;
 
  982   Timer timer(negEvents.size(),
"Negative Event paired");
 
  983   for (
UInt_t nev = 0; nev < negEvents.size(); nev++){
 
  985      Double_t weight = negEvents[nev]->GetWeight();
 
  986      UInt_t  iClassID = negEvents[nev]->GetClass();
 
  987      invCov = ((*cov)[iClassID]);
 
  993         for (
UInt_t iev = 0; iev < fEventSample.size(); iev++){
 
  994            if (iClassID==fEventSample[iev]->
GetClass() && fEventSample[iev]->GetWeight() > 0){
 
  996               for (
UInt_t ivar=0; ivar < GetNvar(); ivar++){
 
  997                  for (
UInt_t jvar=0; jvar<GetNvar(); jvar++){
 
  998                     dist += (negEvents[nev]->GetValue(ivar)-fEventSample[iev]->GetValue(ivar))*
 
  999                        (*invCov)[ivar][jvar]*
 
 1000                        (negEvents[nev]->GetValue(jvar)-fEventSample[iev]->GetValue(jvar));
 
 1003               if (
dist < minDist) { iMin=iev; minDist=
dist;}
 
 1009            Double_t newWeight = (negEvents[nev]->GetWeight() + fEventSample[iMin]->GetWeight());
 
 1011               negEvents[nev]->SetBoostWeight( 0 );
 
 1012               fEventSample[iMin]->SetBoostWeight( newWeight/fEventSample[iMin]->GetOriginalWeight() );  
 
 1014               negEvents[nev]->SetBoostWeight( newWeight/negEvents[nev]->GetOriginalWeight() ); 
 
 1015               fEventSample[iMin]->SetBoostWeight( 0 );
 
 1018         } 
else Log() << kFATAL << 
"preprocessing didn't find event to pair with the negative weight ... probably a bug" << 
Endl;
 
 1019         weight = negEvents[nev]->GetWeight();
 
 1026   totalNegWeights = 0;
 
 1027   totalPosWeights = 0;
 
 1034   std::vector<const Event*> newEventSample;
 
 1036   for (
UInt_t iev = 0; iev < fEventSample.size(); iev++){
 
 1037      if (fEventSample[iev]->GetWeight() < 0) {
 
 1038         totalNegWeights += fEventSample[iev]->GetWeight();
 
 1039         totalWeights    += fEventSample[iev]->GetWeight();
 
 1041         totalPosWeights += fEventSample[iev]->GetWeight();
 
 1042         totalWeights    += fEventSample[iev]->GetWeight();
 
 1044      if (fEventSample[iev]->GetWeight() > 0) {
 
 1045         newEventSample.push_back(
new Event(*fEventSample[iev]));
 
 1046         if (fEventSample[iev]->
GetClass() == fSignalClass){
 
 1047            sigWeight += fEventSample[iev]->GetWeight();
 
 1050            bkgWeight += fEventSample[iev]->GetWeight();
 
 1055   if (totalNegWeights < 0) 
Log() << kFATAL << 
" compensation of negative event weights with positive ones did not work " << totalNegWeights << 
Endl;
 
 1057   for (
UInt_t i=0; i<fEventSample.size();      i++) 
delete fEventSample[i];
 
 1058   fEventSample = newEventSample;
 
 1060   Log() << kINFO  << 
" after PreProcessing, the Event sample is left with " << fEventSample.size() << 
" events (unweighted), all with positive weights, adding up to " << totalWeights << 
Endl;
 
 1061   Log() << kINFO  << 
" nSig="<<nSig << 
" sigWeight="<<sigWeight <<  
" nBkg="<<nBkg << 
" bkgWeight="<<bkgWeight << 
Endl;
 
 1073   std::map<TString,TMVA::Interval*> tuneParameters;
 
 1074   std::map<TString,Double_t> tunedParameters;
 
 1083   tuneParameters.insert(std::pair<TString,Interval*>(
"NTrees",         
new Interval(10,1000,5))); 
 
 1084   tuneParameters.insert(std::pair<TString,Interval*>(
"MaxDepth",       
new Interval(2,4,3)));    
 
 1085   tuneParameters.insert(std::pair<TString,Interval*>(
"MinNodeSize",    
new LogInterval(1,30,30)));    
 
 1090   if        (fBoostType==
"AdaBoost"){
 
 1091      tuneParameters.insert(std::pair<TString,Interval*>(
"AdaBoostBeta",   
new Interval(.2,1.,5)));
 
 1093   }
else if (fBoostType==
"Grad"){
 
 1094      tuneParameters.insert(std::pair<TString,Interval*>(
"Shrinkage",      
new Interval(0.05,0.50,5)));
 
 1096   }
else if (fBoostType==
"Bagging" && fRandomisedTrees){
 
 1099      tuneParameters.insert(std::pair<TString,Interval*>(
"UseNvars",       
new Interval(min_var,max_var,4)));
 
 1103   Log()<<kINFO << 
" the following BDT parameters will be tuned on the respective *grid*\n"<<
Endl;
 
 1104   std::map<TString,TMVA::Interval*>::iterator it;
 
 1105   for(it=tuneParameters.begin(); it!= tuneParameters.end(); ++it){
 
 1106      Log() << kWARNING << it->first << 
Endl;
 
 1107      std::ostringstream oss;
 
 1108      (it->second)->
Print(oss);
 
 1114   tunedParameters=optimize.
optimize();
 
 1116   return tunedParameters;
 
 1125   std::map<TString,Double_t>::iterator it;
 
 1126   for(it=tuneParameters.begin(); it!= tuneParameters.end(); ++it){
 
 1127      Log() << kWARNING << it->first << 
" = " << it->second << 
Endl;
 
 1128      if (it->first ==  
"MaxDepth"       ) SetMaxDepth        ((
Int_t)it->second);
 
 1129      else if (it->first ==  
"MinNodeSize"    ) SetMinNodeSize     (it->second);
 
 1130      else if (it->first ==  
"NTrees"         ) SetNTrees          ((
Int_t)it->second);
 
 1131      else if (it->first ==  
"NodePurityLimit") SetNodePurityLimit (it->second);
 
 1132      else if (it->first ==  
"AdaBoostBeta"   ) SetAdaBoostBeta    (it->second);
 
 1133      else if (it->first ==  
"Shrinkage"      ) SetShrinkage       (it->second);
 
 1134      else if (it->first ==  
"UseNvars"       ) SetUseNvars        ((
Int_t)it->second);
 
 1135      else if (it->first ==  
"BaggedSampleFraction" ) SetBaggedSampleFraction (it->second);
 
 1136      else Log() << kFATAL << 
" SetParameter for " << it->first << 
" not yet implemented " <<
Endl;
 
 1154      Log() << kERROR << 
" Zero Decision Trees demanded... that does not work !! " 
 1155            << 
" I set it to 1 .. just so that the program does not crash" 
 1160   if (fInteractive && fInteractive->NotInitialized()){
 
 1161     std::vector<TString> titles = {
"Boost weight", 
"Error Fraction"};
 
 1162     fInteractive->Init(titles);
 
 1164   fIPyMaxIter = fNTrees;
 
 1165   fExitFromTraining = 
false;
 
 1169   if (IsNormalised()) 
Log() << kFATAL << 
"\"Normalise\" option cannot be used with BDT; " 
 1170                             << 
"please remove the option from the configuration string, or " 
 1171                             << 
"use \"!Normalise\"" 
 1175      Log() << kINFO << 
"Regression Loss Function: "<< fRegressionLossFunctionBDTG->Name() << 
Endl;
 
 1177   Log() << kINFO << 
"Training "<< fNTrees << 
" Decision Trees ... patience please" << 
Endl;
 
 1179   Log() << kDEBUG << 
"Training with maximal depth = " <<fMaxDepth
 
 1180         << 
", MinNodeEvents=" << fMinNodeEvents
 
 1181         << 
", NTrees="<<fNTrees
 
 1182         << 
", NodePurityLimit="<<fNodePurityLimit
 
 1183         << 
", AdaBoostBeta="<<fAdaBoostBeta
 
 1189   TString hname = 
"AdaBooost weight distribution";
 
 1195   if (DoRegression()) {
 
 1199      hname=
"Boost event weights distribution";
 
 1205   TH1* nodesBeforePruningVsTree = 
new TH1I(
Form(
"%s_NodesBeforePruning",DataInfo().
GetName()),
"nodes before pruning",fNTrees,0,fNTrees);
 
 1206   TH1* nodesAfterPruningVsTree = 
new TH1I(
Form(
"%s_NodesAfterPruning",DataInfo().
GetName()),
"nodes after pruning",fNTrees,0,fNTrees);
 
 1210   if(!DoMulticlass()){
 
 1213      h->SetXTitle(
"boost weight");
 
 1214      results->
Store(
h, 
"BoostWeights");
 
 1218      if (fDoBoostMonitor){
 
 1219         TH2* boostMonitor = 
new TH2F(
"BoostMonitor",
"ROC Integral Vs iTree",2,0,fNTrees,2,0,1.05);
 
 1221         boostMonitor->
SetYTitle(
"ROC Integral");
 
 1222         results->
Store(boostMonitor, 
"BoostMonitor");
 
 1224         boostMonitorGraph->
SetName(
"BoostMonitorGraph");
 
 1225         boostMonitorGraph->
SetTitle(
"ROCIntegralVsNTrees");
 
 1226         results->
Store(boostMonitorGraph, 
"BoostMonitorGraph");
 
 1230      h = 
new TH1F(
"BoostWeightVsTree",
"Boost weights vs tree",fNTrees,0,fNTrees);
 
 1231      h->SetXTitle(
"#tree");
 
 1232      h->SetYTitle(
"boost weight");
 
 1233      results->
Store(
h, 
"BoostWeightsVsTree");
 
 1236      h = 
new TH1F(
"ErrFractHist",
"error fraction vs tree number",fNTrees,0,fNTrees);
 
 1237      h->SetXTitle(
"#tree");
 
 1238      h->SetYTitle(
"error fraction");
 
 1239      results->
Store(
h, 
"ErrorFrac");
 
 1242      nodesBeforePruningVsTree->
SetXTitle(
"#tree");
 
 1243      nodesBeforePruningVsTree->
SetYTitle(
"#tree nodes");
 
 1244      results->
Store(nodesBeforePruningVsTree);
 
 1247      nodesAfterPruningVsTree->
SetXTitle(
"#tree");
 
 1248      nodesAfterPruningVsTree->
SetYTitle(
"#tree nodes");
 
 1249      results->
Store(nodesAfterPruningVsTree);
 
 1253   fMonitorNtuple= 
new TTree(
"MonitorNtuple",
"BDT variables");
 
 1254   fMonitorNtuple->Branch(
"iTree",&fITree,
"iTree/I");
 
 1255   fMonitorNtuple->Branch(
"boostWeight",&fBoostWeight,
"boostWeight/D");
 
 1256   fMonitorNtuple->Branch(
"errorFraction",&fErrorFraction,
"errorFraction/D");
 
 1259   Int_t nNodesBeforePruningCount = 0;
 
 1260   Int_t nNodesAfterPruningCount = 0;
 
 1262   Int_t nNodesBeforePruning = 0;
 
 1263   Int_t nNodesAfterPruning = 0;
 
 1265   if(fBoostType==
"Grad"){
 
 1266      InitGradBoost(fEventSample);
 
 1273   while (itree < fNTrees && continueBoost){
 
 1274     if (fExitFromTraining) 
break;
 
 1275     fIPyCurrentIter = itree;
 
 1288         if (fBoostType!=
"Grad"){
 
 1289            Log() << kFATAL << 
"Multiclass is currently only supported by gradient boost. " 
 1290                  << 
"Please change boost option accordingly (BoostType=Grad)." << 
Endl;
 
 1293         UInt_t nClasses = DataInfo().GetNClasses();
 
 1294         for (
UInt_t i=0;i<nClasses;i++){
 
 1298            fForest.push_back( 
new DecisionTree( fSepType, fMinNodeSize, fNCuts, &(DataInfo()), i,
 
 1299                                                 fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
 
 1300                                                 itree*nClasses+i, fNodePurityLimit, itree*nClasses+1));
 
 1301            fForest.back()->SetNVars(GetNvar());
 
 1302            if (fUseFisherCuts) {
 
 1303               fForest.back()->SetUseFisherCuts();
 
 1304               fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher);
 
 1305               fForest.back()->SetUseExclusiveVars(fUseExclusiveVars);
 
 1309            nNodesBeforePruning = fForest.back()->BuildTree(*fTrainSample);
 
 1310            Double_t bw = this->Boost(*fTrainSample, fForest.back(),i);
 
 1312               fBoostWeights.push_back(bw);
 
 1314               fBoostWeights.push_back(0);
 
 1315               Log() << kWARNING << 
"stopped boosting at itree="<<itree << 
Endl;
 
 1324                                              fRandomisedTrees, fUseNvars, fUsePoissonNvars, fMaxDepth,
 
 1325                                              itree, fNodePurityLimit, itree);
 
 1327         fForest.push_back(dt);
 
 1328         fForest.back()->SetNVars(GetNvar());
 
 1329         if (fUseFisherCuts) {
 
 1330            fForest.back()->SetUseFisherCuts();
 
 1331            fForest.back()->SetMinLinCorrForFisher(fMinLinCorrForFisher);
 
 1332            fForest.back()->SetUseExclusiveVars(fUseExclusiveVars);
 
 1335         nNodesBeforePruning = fForest.back()->BuildTree(*fTrainSample);
 
 1337         if (fUseYesNoLeaf && !DoRegression() && fBoostType!=
"Grad") { 
 
 1338            nNodesBeforePruning = fForest.back()->CleanTree();
 
 1341         nNodesBeforePruningCount += nNodesBeforePruning;
 
 1342         nodesBeforePruningVsTree->
SetBinContent(itree+1,nNodesBeforePruning);
 
 1344         fForest.back()->SetPruneMethod(fPruneMethod); 
 
 1345         fForest.back()->SetPruneStrength(fPruneStrength); 
 
 1347         std::vector<const Event*> * validationSample = NULL;
 
 1348         if(fAutomatic) validationSample = &fValidationSample;
 
 1349         Double_t bw = this->Boost(*fTrainSample, fForest.back());
 
 1351            fBoostWeights.push_back(bw);
 
 1353            fBoostWeights.push_back(0);
 
 1354            Log() << kWARNING << 
"stopped boosting at itree="<<itree << 
Endl;
 
 1363         if (fUseYesNoLeaf && !DoRegression() && fBoostType!=
"Grad"){ 
 
 1364            fForest.back()->CleanTree();
 
 1366         nNodesAfterPruning = fForest.back()->GetNNodes();
 
 1367         nNodesAfterPruningCount += nNodesAfterPruning;
 
 1368         nodesAfterPruningVsTree->
SetBinContent(itree+1,nNodesAfterPruning);
 
 1371           fInteractive->AddPoint(itree, fBoostWeight, fErrorFraction);
 
 1374         fMonitorNtuple->Fill();
 
 1375         if (fDoBoostMonitor){
 
 1376            if (! DoRegression() ){
 
 1377               if (  itree==fNTrees-1 ||  (!(itree%500)) ||
 
 1378                     (!(itree%250) && itree <1000)||
 
 1379                     (!(itree%100) && itree < 500)||
 
 1380                     (!(itree%50)  && itree < 250)||
 
 1381                     (!(itree%25)  && itree < 150)||
 
 1382                     (!(itree%10)  && itree <  50)||
 
 1383                     (!(itree%5)   && itree <  20)
 
 1384                     ) BoostMonitor(itree);
 
 1395      Log() << kDEBUG << 
"\t<Train> average number of nodes (w/o pruning) : " 
 1396            << nNodesBeforePruningCount/GetNTrees() << 
Endl;
 
 1399      Log() << kDEBUG << 
"\t<Train> average number of nodes before/after pruning : " 
 1400            << nNodesBeforePruningCount/GetNTrees() << 
" / " 
 1401            << nNodesAfterPruningCount/GetNTrees()
 
 1409   Log() << kDEBUG << 
"Now I delete the privat data sample"<< 
Endl;
 
 1410   for (
UInt_t i=0; i<fEventSample.size();      i++) 
delete fEventSample[i];
 
 1411   for (
UInt_t i=0; i<fValidationSample.size(); i++) 
delete fValidationSample[i];
 
 1412   fEventSample.clear();
 
 1413   fValidationSample.clear();
 
 1415   if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
 
 1426   for (
UInt_t itree=0; itree<nTrees; itree++) {
 
 1431   return 2.0/(1.0+
exp(-2.0*
sum))-1; 
 
 1439   if (DoMulticlass()) {
 
 1440      UInt_t nClasses = DataInfo().GetNClasses();
 
 1441      Bool_t isLastClass = (cls == nClasses - 1);
 
 1453      std::map<const TMVA::Event *, std::vector<double>> & residuals = this->fResiduals;
 
 1456      auto update_residuals = [&residuals, &lastTree, cls](
const TMVA::Event * 
e) {
 
 1460      auto update_residuals_last = [&residuals, &lastTree, cls, nClasses](
const TMVA::Event * 
e) {
 
 1463         auto &residualsThisEvent = residuals[
e];
 
 1465         std::vector<Double_t> expCache(nClasses, 0.0);
 
 1466         std::transform(residualsThisEvent.begin(),
 
 1467                        residualsThisEvent.begin() + nClasses,
 
 1468                        expCache.begin(), [](
Double_t d) { return exp(d); });
 
 1470         Double_t exp_sum = std::accumulate(expCache.begin(),
 
 1471                                            expCache.begin() + nClasses,
 
 1474         for (
UInt_t i = 0; i < nClasses; i++) {
 
 1475            Double_t p_cls = expCache[i] / exp_sum;
 
 1477            Double_t res = (
e->GetClass() == i) ? (1.0 - p_cls) : (-p_cls);
 
 1484                                 .
Foreach(update_residuals_last, eventSample);
 
 1487                                 .
Foreach(update_residuals, eventSample);
 
 1493      std::vector<Double_t> expCache;
 
 1495         expCache.resize(nClasses);
 
 1498      for (
auto e : eventSample) {
 
 1499         fResiduals[
e].at(cls) += fForest.back()->CheckEvent(
e, 
kFALSE);
 
 1501            auto &residualsThisEvent = fResiduals[
e];
 
 1502            std::transform(residualsThisEvent.begin(),
 
 1503                           residualsThisEvent.begin() + nClasses,
 
 1504                           expCache.begin(), [](
Double_t d) { return exp(d); });
 
 1506            Double_t exp_sum = std::accumulate(expCache.begin(),
 
 1507                                               expCache.begin() + nClasses,
 
 1510            for (
UInt_t i = 0; i < nClasses; i++) {
 
 1511               Double_t p_cls = expCache[i] / exp_sum;
 
 1513               Double_t res = (
e->GetClass() == i) ? (1.0 - p_cls) : (-p_cls);
 
 1520      std::map<const TMVA::Event *, std::vector<double>> & residuals = this->fResiduals;
 
 1523      UInt_t signalClass = DataInfo().GetSignalClassIndex();
 
 1526      auto update_residuals = [&residuals, &lastTree, signalClass](
const TMVA::Event * 
e) {
 
 1527         double & residualAt0 = residuals[
e].at(0);
 
 1530         Double_t p_sig = 1.0 / (1.0 + 
exp(-2.0 * residualAt0));
 
 1531         Double_t res = ((
e->GetClass() == signalClass) ? (1.0 - p_sig) : (-p_sig));
 
 1537                              .
Foreach(update_residuals, eventSample);
 
 1539      for (
auto e : eventSample) {
 
 1540         double & residualAt0 = residuals[
e].at(0);
 
 1543         Double_t p_sig = 1.0 / (1.0 + 
exp(-2.0 * residualAt0));
 
 1544         Double_t res = ((
e->GetClass() == signalClass) ? (1.0 - p_sig) : (-p_sig));
 
 1567      auto f = [
this, &nPartitions](
UInt_t partition = 0) -> 
Int_t {
 
 1568         Int_t start = 1.0 * partition / nPartitions * this->fEventSample.size();
 
 1569         Int_t end = (partition + 1.0) / nPartitions * this->fEventSample.size();
 
 1571         for (
Int_t i = start; i < end; ++i) {
 
 1590   fRegressionLossFunctionBDTG->SetTargets(eventSample, fLossFunctionEventInfo);
 
 1604   std::unordered_map<TMVA::DecisionTreeNode*, LeafInfo> leaves;
 
 1605   for (
auto e : eventSample) {
 
 1608      auto &
v = leaves[node];
 
 1609      auto target = 
e->GetTarget(cls);
 
 1610      v.sumWeightTarget += target * weight;
 
 1611      v.sum2 += 
fabs(target) * (1.0 - 
fabs(target)) * weight;
 
 1613   for (
auto &iLeave : leaves) {
 
 1614      constexpr auto minValue = 1
e-30;
 
 1615      if (iLeave.second.sum2 < minValue) {
 
 1616         iLeave.second.sum2 = minValue;
 
 1618      const Double_t K = DataInfo().GetNClasses();
 
 1619      iLeave.first->SetResponse(fShrinkage * (
K - 1) / 
K * iLeave.second.sumWeightTarget / iLeave.second.sum2);
 
 1624   DoMulticlass() ? UpdateTargets(fEventSample, cls) : UpdateTargets(fEventSample);
 
 1636   std::map<TMVA::DecisionTreeNode*,vector< TMVA::LossFunctionEventInfo > > leaves;
 
 1637   for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 1639      (leaves[node]).push_back(fLossFunctionEventInfo[*
e]);
 
 1646   for (std::map<
TMVA::DecisionTreeNode*,vector< TMVA::LossFunctionEventInfo > >::iterator iLeave=leaves.begin();
 
 1647        iLeave!=leaves.end();++iLeave){
 
 1648      Double_t fit = fRegressionLossFunctionBDTG->Fit(iLeave->second);
 
 1649      (iLeave->first)->SetResponse(fShrinkage*fit);
 
 1652   UpdateTargetsRegression(*fTrainSample);
 
 1667      for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 1671      fRegressionLossFunctionBDTG->Init(fLossFunctionEventInfo, fBoostWeights);
 
 1672      UpdateTargetsRegression(*fTrainSample,
kTRUE);
 
 1676   else if(DoMulticlass()){
 
 1677      UInt_t nClasses = DataInfo().GetNClasses();
 
 1678      for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 1679         for (
UInt_t i=0;i<nClasses;i++){
 
 1681            Double_t r = (*e)->GetClass()==i?(1-1.0/nClasses):(-1.0/nClasses);
 
 1683            fResiduals[*
e].push_back(0);
 
 1688      for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 1689         Double_t r = (DataInfo().IsSignal(*
e)?1:0)-0.5; 
 
 1691         fResiduals[*
e].push_back(0);
 
 1702   for (
UInt_t ievt=0; ievt<fValidationSample.size(); ievt++) {
 
 1703      Bool_t isSignalType= (dt->
CheckEvent(fValidationSample[ievt]) > fNodePurityLimit ) ? 1 : 0;
 
 1705      if (isSignalType == (DataInfo().IsSignal(fValidationSample[ievt])) ) {
 
 1706         ncorrect += fValidationSample[ievt]->GetWeight();
 
 1709         nfalse += fValidationSample[ievt]->GetWeight();
 
 1713   return  ncorrect / (ncorrect + nfalse);
 
 1724   if      (fBoostType==
"AdaBoost")    returnVal = this->AdaBoost  (eventSample, dt);
 
 1725   else if (fBoostType==
"AdaCost")     returnVal = this->AdaCost   (eventSample, dt);
 
 1726   else if (fBoostType==
"Bagging")     returnVal = this->Bagging   ( );
 
 1727   else if (fBoostType==
"RegBoost")    returnVal = this->RegBoost  (eventSample, dt);
 
 1728   else if (fBoostType==
"AdaBoostR2")  returnVal = this->AdaBoostR2(eventSample, dt);
 
 1729   else if (fBoostType==
"Grad"){
 
 1731         returnVal = this->GradBoostRegression(eventSample, dt);
 
 1732      else if(DoMulticlass())
 
 1733         returnVal = this->GradBoost (eventSample, dt, cls);
 
 1735         returnVal = this->GradBoost (eventSample, dt);
 
 1738      Log() << kINFO << GetOptions() << 
Endl;
 
 1739      Log() << kFATAL << 
"<Boost> unknown boost option " << fBoostType<< 
" called" << 
Endl;
 
 1743      GetBaggedSubSample(fEventSample);
 
 1758   TH1F *tmpS = 
new TH1F( 
"tmpS", 
"",     100 , -1., 1.00001 );
 
 1759   TH1F *tmpB = 
new TH1F( 
"tmpB", 
"",     100 , -1., 1.00001 );
 
 1763   UInt_t signalClassNr = DataInfo().GetClassInfo(
"Signal")->GetNumber();
 
 1773   UInt_t nevents = Data()->GetNTestEvents();
 
 1774   for (
UInt_t iev=0; iev < nevents; iev++){
 
 1775      const Event* 
event = GetTestingEvent(iev);
 
 1777      if (event->GetClass() == signalClassNr) {tmp=tmpS;}
 
 1779      tmp->
Fill(PrivateGetMvaValue(event),event->GetWeight());
 
 1783   std::vector<TH1F*> hS;
 
 1784   std::vector<TH1F*> hB;
 
 1785   for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
 
 1786      hS.push_back(
new TH1F(
Form(
"SigVar%dAtTree%d",ivar,iTree),
Form(
"SigVar%dAtTree%d",ivar,iTree),100,DataInfo().GetVariableInfo(ivar).GetMin(),DataInfo().GetVariableInfo(ivar).GetMax()));
 
 1787      hB.push_back(
new TH1F(
Form(
"BkgVar%dAtTree%d",ivar,iTree),
Form(
"BkgVar%dAtTree%d",ivar,iTree),100,DataInfo().GetVariableInfo(ivar).GetMin(),DataInfo().GetVariableInfo(ivar).GetMax()));
 
 1788      results->
Store(hS.back(),hS.back()->GetTitle());
 
 1789      results->
Store(hB.back(),hB.back()->GetTitle());
 
 1793   for (
UInt_t iev=0; iev < fEventSample.size(); iev++){
 
 1794      if (fEventSample[iev]->GetBoostWeight() > max) max = 1.01*fEventSample[iev]->GetBoostWeight();
 
 1796   TH1F *tmpBoostWeightsS = 
new TH1F(
Form(
"BoostWeightsInTreeS%d",iTree),
Form(
"BoostWeightsInTreeS%d",iTree),100,0.,max);
 
 1797   TH1F *tmpBoostWeightsB = 
new TH1F(
Form(
"BoostWeightsInTreeB%d",iTree),
Form(
"BoostWeightsInTreeB%d",iTree),100,0.,max);
 
 1798   results->
Store(tmpBoostWeightsS,tmpBoostWeightsS->
GetTitle());
 
 1799   results->
Store(tmpBoostWeightsB,tmpBoostWeightsB->
GetTitle());
 
 1801   TH1F *tmpBoostWeights;
 
 1802   std::vector<TH1F*> *
h;
 
 1804   for (
UInt_t iev=0; iev < fEventSample.size(); iev++){
 
 1805      if (fEventSample[iev]->
GetClass() == signalClassNr) {
 
 1806         tmpBoostWeights=tmpBoostWeightsS;
 
 1809         tmpBoostWeights=tmpBoostWeightsB;
 
 1812      tmpBoostWeights->
Fill(fEventSample[iev]->GetBoostWeight());
 
 1813      for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
 
 1814         (*h)[ivar]->Fill(fEventSample[iev]->GetValue(ivar),fEventSample[iev]->GetWeight());
 
 1850   Double_t err=0, sumGlobalw=0, sumGlobalwfalse=0, sumGlobalwfalse2=0;
 
 1852   std::vector<Double_t> sumw(DataInfo().GetNClasses(),0); 
 
 1855   for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 1858      UInt_t iclass=(*e)->GetClass();
 
 1861      if ( DoRegression() ) {
 
 1863         sumGlobalwfalse += w * tmpDev;
 
 1864         sumGlobalwfalse2 += w * tmpDev*tmpDev;
 
 1865         if (tmpDev > maxDev) maxDev = tmpDev;
 
 1870            if (!(isSignalType == DataInfo().IsSignal(*
e))) {
 
 1871               sumGlobalwfalse+= w;
 
 1876            if (DataInfo().IsSignal(*
e)) trueType = 1;
 
 1878            sumGlobalwfalse+= w*trueType*dtoutput;
 
 1883   err = sumGlobalwfalse/sumGlobalw ;
 
 1884   if ( DoRegression() ) {
 
 1886      if (fAdaBoostR2Loss==
"linear"){
 
 1887         err = sumGlobalwfalse/maxDev/sumGlobalw ;
 
 1889      else if (fAdaBoostR2Loss==
"quadratic"){
 
 1890         err = sumGlobalwfalse2/maxDev/maxDev/sumGlobalw ;
 
 1892      else if (fAdaBoostR2Loss==
"exponential"){
 
 1894         for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 1897            err += w * (1 - 
exp (-tmpDev/maxDev)) / sumGlobalw;
 
 1902         Log() << kFATAL << 
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential " 
 1903               << 
" namely " << fAdaBoostR2Loss << 
"\n" 
 1904               << 
"and this is not implemented... a typo in the options ??" <<
Endl;
 
 1908   Log() << kDEBUG << 
"BDT AdaBoos  wrong/all: " << sumGlobalwfalse << 
"/" << sumGlobalw << 
Endl;
 
 1912   std::vector<Double_t> newSumw(sumw.size(),0);
 
 1915   if (err >= 0.5 && fUseYesNoLeaf) { 
 
 1919         Log() << kERROR << 
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot " 
 1920               << 
"boost such a thing... if after 1 step the error rate is == 0.5" 
 1922               << 
"please check why this happens, maybe too many events per node requested ?" 
 1926         Log() << kERROR << 
" The error rate in the BDT boosting is > 0.5. ("<< err
 
 1927               << 
") That should not happen, please check your code (i.e... the BDT code), I " 
 1928               << 
" stop boosting here" <<  
Endl;
 
 1932   } 
else if (err < 0) {
 
 1933      Log() << kERROR << 
" The error rate in the BDT boosting is < 0. That can happen" 
 1934            << 
" due to improper treatment of negative weights in a Monte Carlo.. (if you have" 
 1935            << 
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)" 
 1936            << 
" for the time being I set it to its absolute value.. just to continue.." <<  
Endl;
 
 1940      boostWeight = 
TMath::Log((1.-err)/err)*fAdaBoostBeta;
 
 1942      boostWeight = 
TMath::Log((1.+err)/(1-err))*fAdaBoostBeta;
 
 1945   Log() << kDEBUG << 
"BDT AdaBoos  wrong/all: " << sumGlobalwfalse << 
"/" << sumGlobalw << 
" 1-err/err="<<boostWeight<< 
" log.."<<
TMath::Log(boostWeight)<<
Endl;
 
 1950   for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 1952      if (fUseYesNoLeaf||DoRegression()){
 
 1953         if ((!( (dt->
CheckEvent(*
e,fUseYesNoLeaf) > fNodePurityLimit ) == DataInfo().IsSignal(*
e))) || DoRegression()) {
 
 1957            if ( (*e)->GetWeight() > 0 ){
 
 1958               (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
 
 1960               if (DoRegression()) results->
GetHist(
"BoostWeights")->
Fill(boostfactor);
 
 1962               if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. / boostfactor); 
 
 1963               else (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
 
 1971         if (DataInfo().IsSignal(*
e)) trueType = 1;
 
 1975         if ( (*e)->GetWeight() > 0 ){
 
 1976            (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
 
 1978            if (DoRegression()) results->
GetHist(
"BoostWeights")->
Fill(boostfactor);
 
 1980            if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. / boostfactor); 
 
 1981            else (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
 
 1984      newSumGlobalw+=(*e)->GetWeight();
 
 1985      newSumw[(*e)->GetClass()] += (*e)->GetWeight();
 
 1991   Log() << kDEBUG << 
"new Nsig="<<newSumw[0]*globalNormWeight << 
" new Nbkg="<<newSumw[1]*globalNormWeight << 
Endl;
 
 1994   for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 1998      if (DataInfo().IsSignal(*
e))(*e)->ScaleBoostWeight( globalNormWeight * fSigToBkgFraction );
 
 1999      else                (*e)->ScaleBoostWeight( globalNormWeight );
 
 2002   if (!(DoRegression()))results->
GetHist(
"BoostWeights")->
Fill(boostWeight);
 
 2006   fBoostWeight = boostWeight;
 
 2007   fErrorFraction = err;
 
 2033   Double_t err=0, sumGlobalWeights=0, sumGlobalCost=0;
 
 2035   std::vector<Double_t> sumw(DataInfo().GetNClasses(),0);      
 
 2037   for (vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 2039      sumGlobalWeights += w;
 
 2040      UInt_t iclass=(*e)->GetClass();
 
 2044      if ( DoRegression() ) {
 
 2045         Log() << kFATAL << 
" AdaCost not implemented for regression"<<
Endl;
 
 2050         Bool_t   isTrueSignal = DataInfo().IsSignal(*
e);
 
 2051         Bool_t   isSelectedSignal = (dtoutput>0);
 
 2052         if (isTrueSignal) trueType = 1;
 
 2056         if       (isTrueSignal  && isSelectedSignal)  cost=Css;
 
 2057         else if  (isTrueSignal  && !isSelectedSignal) cost=Cts_sb;
 
 2058         else if  (!isTrueSignal  && isSelectedSignal) cost=Ctb_ss;
 
 2059         else if  (!isTrueSignal && !isSelectedSignal) cost=Cbb;
 
 2060         else Log() << kERROR << 
"something went wrong in AdaCost" << 
Endl;
 
 2062         sumGlobalCost+= w*trueType*dtoutput*cost;
 
 2067   if ( DoRegression() ) {
 
 2068      Log() << kFATAL << 
" AdaCost not implemented for regression"<<
Endl;
 
 2073   sumGlobalCost /= sumGlobalWeights;
 
 2078   vector<Double_t> newSumClassWeights(sumw.size(),0);
 
 2080   Double_t boostWeight = 
TMath::Log((1+sumGlobalCost)/(1-sumGlobalCost)) * fAdaBoostBeta;
 
 2084   for (vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 2087      Bool_t   isTrueSignal = DataInfo().IsSignal(*
e);
 
 2088      Bool_t   isSelectedSignal = (dtoutput>0);
 
 2089      if (isTrueSignal) trueType = 1;
 
 2093      if       (isTrueSignal  && isSelectedSignal)  cost=Css;
 
 2094      else if  (isTrueSignal  && !isSelectedSignal) cost=Cts_sb;
 
 2095      else if  (!isTrueSignal  && isSelectedSignal) cost=Ctb_ss;
 
 2096      else if  (!isTrueSignal && !isSelectedSignal) cost=Cbb;
 
 2097      else Log() << kERROR << 
"something went wrong in AdaCost" << 
Endl;
 
 2100      if (DoRegression())
Log() << kFATAL << 
" AdaCost not implemented for regression"<<
Endl;
 
 2101      if ( (*e)->GetWeight() > 0 ){
 
 2102         (*e)->SetBoostWeight( (*e)->GetBoostWeight() * boostfactor);
 
 2104         if (DoRegression())
Log() << kFATAL << 
" AdaCost not implemented for regression"<<
Endl;
 
 2106         if ( fInverseBoostNegWeights )(*e)->ScaleBoostWeight( 1. / boostfactor); 
 
 2109      newSumGlobalWeights+=(*e)->GetWeight();
 
 2110      newSumClassWeights[(*e)->GetClass()] += (*e)->GetWeight();
 
 2115   Double_t globalNormWeight=
Double_t(eventSample.size())/newSumGlobalWeights;
 
 2116   Log() << kDEBUG << 
"new Nsig="<<newSumClassWeights[0]*globalNormWeight << 
" new Nbkg="<<newSumClassWeights[1]*globalNormWeight << 
Endl;
 
 2119   for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 2122      if (DataInfo().IsSignal(*
e))(*e)->ScaleBoostWeight( globalNormWeight * fSigToBkgFraction );
 
 2123      else                (*e)->ScaleBoostWeight( globalNormWeight );
 
 2127   if (!(DoRegression()))results->
GetHist(
"BoostWeights")->
Fill(boostWeight);
 
 2131   fBoostWeight = boostWeight;
 
 2132   fErrorFraction = err;
 
 2159   if (!fSubSample.empty()) fSubSample.clear();
 
 2161   for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 2162      n = trandom->
PoissonD(fBaggedSampleFraction);
 
 2163      for (
Int_t i=0;i<
n;i++) fSubSample.push_back(*
e);
 
 2197   if ( !DoRegression() ) 
Log() << kFATAL << 
"Somehow you chose a regression boost method for a classification job" << 
Endl;
 
 2199   Double_t err=0, sumw=0, sumwfalse=0, sumwfalse2=0;
 
 2201   for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 2206      sumwfalse  += w * tmpDev;
 
 2207      sumwfalse2 += w * tmpDev*tmpDev;
 
 2208      if (tmpDev > maxDev) maxDev = tmpDev;
 
 2212   if (fAdaBoostR2Loss==
"linear"){
 
 2213      err = sumwfalse/maxDev/sumw ;
 
 2215   else if (fAdaBoostR2Loss==
"quadratic"){
 
 2216      err = sumwfalse2/maxDev/maxDev/sumw ;
 
 2218   else if (fAdaBoostR2Loss==
"exponential"){
 
 2220      for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 2223         err += w * (1 - 
exp (-tmpDev/maxDev)) / sumw;
 
 2228      Log() << kFATAL << 
" you've chosen a Loss type for Adaboost other than linear, quadratic or exponential " 
 2229            << 
" namely " << fAdaBoostR2Loss << 
"\n" 
 2230            << 
"and this is not implemented... a typo in the options ??" <<
Endl;
 
 2238         Log() << kERROR << 
" YOUR tree has only 1 Node... kind of a funny *tree*. I cannot " 
 2239               << 
"boost such a thing... if after 1 step the error rate is == 0.5" 
 2241               << 
"please check why this happens, maybe too many events per node requested ?" 
 2245         Log() << kERROR << 
" The error rate in the BDT boosting is > 0.5. ("<< err
 
 2246               << 
") That should not happen, but is possible for regression trees, and" 
 2247               << 
" should trigger a stop for the boosting. please check your code (i.e... the BDT code), I " 
 2248               << 
" stop boosting " <<  
Endl;
 
 2252   } 
else if (err < 0) {
 
 2253      Log() << kERROR << 
" The error rate in the BDT boosting is < 0. That can happen" 
 2254            << 
" due to improper treatment of negative weights in a Monte Carlo.. (if you have" 
 2255            << 
" an idea on how to do it in a better way, please let me know (Helge.Voss@cern.ch)" 
 2256            << 
" for the time being I set it to its absolute value.. just to continue.." <<  
Endl;
 
 2260   Double_t boostWeight = err / (1.-err);
 
 2265   for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 2267      results->
GetHist(
"BoostWeights")->
Fill(boostfactor);
 
 2269      if ( (*e)->GetWeight() > 0 ){
 
 2270         Float_t newBoostWeight = (*e)->GetBoostWeight() * boostfactor;
 
 2271         Float_t newWeight = (*e)->GetWeight() * (*e)->GetBoostWeight() * boostfactor;
 
 2272         if (newWeight == 0) {
 
 2273            Log() << kINFO << 
"Weight=    "   <<   (*e)->GetWeight() << 
Endl;
 
 2274            Log() << kINFO  << 
"BoostWeight= " <<   (*e)->GetBoostWeight() << 
Endl;
 
 2275            Log() << kINFO  << 
"boostweight="<<boostWeight << 
"  err= " <<err << 
Endl;
 
 2276            Log() << kINFO  << 
"NewBoostWeight= " <<   newBoostWeight << 
Endl;
 
 2277            Log() << kINFO  << 
"boostfactor= " <<  boostfactor << 
Endl;
 
 2278            Log() << kINFO  << 
"maxDev     = " <<  maxDev << 
Endl;
 
 2280            Log() << kINFO  << 
"target     = " <<  (*e)->GetTarget(0)  << 
Endl;
 
 2283         (*e)->SetBoostWeight( newBoostWeight );
 
 2286         (*e)->SetBoostWeight( (*e)->GetBoostWeight() / boostfactor);
 
 2288      newSumw+=(*e)->GetWeight();
 
 2292   Double_t normWeight =  sumw / newSumw;
 
 2293   for (std::vector<const TMVA::Event*>::const_iterator 
e=eventSample.begin(); 
e!=eventSample.end();++
e) {
 
 2296      (*e)->SetBoostWeight( (*e)->GetBoostWeight() * normWeight );
 
 2303   fBoostWeight = boostWeight;
 
 2304   fErrorFraction = err;
 
 2316   if (fDoPreselection){
 
 2317      for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
 
 2318         gTools().
AddAttr( wght, 
Form(
"PreselectionLowBkgVar%d",ivar),      fIsLowBkgCut[ivar]);
 
 2319         gTools().
AddAttr( wght, 
Form(
"PreselectionLowBkgVar%dValue",ivar), fLowBkgCut[ivar]);
 
 2320         gTools().
AddAttr( wght, 
Form(
"PreselectionLowSigVar%d",ivar),      fIsLowSigCut[ivar]);
 
 2321         gTools().
AddAttr( wght, 
Form(
"PreselectionLowSigVar%dValue",ivar), fLowSigCut[ivar]);
 
 2322         gTools().
AddAttr( wght, 
Form(
"PreselectionHighBkgVar%d",ivar),     fIsHighBkgCut[ivar]);
 
 2323         gTools().
AddAttr( wght, 
Form(
"PreselectionHighBkgVar%dValue",ivar),fHighBkgCut[ivar]);
 
 2324         gTools().
AddAttr( wght, 
Form(
"PreselectionHighSigVar%d",ivar),     fIsHighSigCut[ivar]);
 
 2325         gTools().
AddAttr( wght, 
Form(
"PreselectionHighSigVar%dValue",ivar),fHighSigCut[ivar]);
 
 2331   gTools().
AddAttr( wght, 
"AnalysisType", fForest.back()->GetAnalysisType() );
 
 2333   for (
UInt_t i=0; i< fForest.size(); i++) {
 
 2334      void* trxml = fForest[i]->AddXMLTo(wght);
 
 2345   for (i=0; i<fForest.size(); i++) 
delete fForest[i];
 
 2347   fBoostWeights.clear();
 
 2354   if (
gTools().HasAttr( parent, 
Form(
"PreselectionLowBkgVar%d",0))) {
 
 2355      fIsLowBkgCut.resize(GetNvar());
 
 2356      fLowBkgCut.resize(GetNvar());
 
 2357      fIsLowSigCut.resize(GetNvar());
 
 2358      fLowSigCut.resize(GetNvar());
 
 2359      fIsHighBkgCut.resize(GetNvar());
 
 2360      fHighBkgCut.resize(GetNvar());
 
 2361      fIsHighSigCut.resize(GetNvar());
 
 2362      fHighSigCut.resize(GetNvar());
 
 2366      for (
UInt_t ivar=0; ivar<GetNvar(); ivar++){
 
 2368         fIsLowBkgCut[ivar]=tmpBool;
 
 2370         fLowBkgCut[ivar]=tmpDouble;
 
 2372         fIsLowSigCut[ivar]=tmpBool;
 
 2374         fLowSigCut[ivar]=tmpDouble;
 
 2376         fIsHighBkgCut[ivar]=tmpBool;
 
 2378         fHighBkgCut[ivar]=tmpDouble;
 
 2380         fIsHighSigCut[ivar]=tmpBool;
 
 2382         fHighSigCut[ivar]=tmpDouble;
 
 2389   if(
gTools().HasAttr(parent, 
"TreeType")) { 
 
 2400      fForest.back()->SetTreeID(i++);
 
 2402      fBoostWeights.push_back(boostWeight);
 
 2414   Int_t analysisType(0);
 
 2417   istr >> 
dummy >> fNTrees;
 
 2418   Log() << kINFO << 
"Read " << fNTrees << 
" Decision trees" << 
Endl;
 
 2420   for (
UInt_t i=0;i<fForest.size();i++) 
delete fForest[i];
 
 2422   fBoostWeights.clear();
 
 2425   for (
int i=0;i<fNTrees;i++) {
 
 2426      istr >> 
dummy >> iTree >> 
dummy >> boostWeight;
 
 2428         fForest.back()->Print( std::cout );
 
 2429         Log() << kFATAL << 
"Error while reading weight file; mismatch iTree=" 
 2430               << iTree << 
" i=" << i
 
 2431               << 
" dummy " << 
dummy 
 2432               << 
" boostweight " << boostWeight
 
 2437      fForest.back()->SetTreeID(i);
 
 2438      fForest.back()->
Read(istr, GetTrainingTMVAVersionCode());
 
 2439      fBoostWeights.push_back(boostWeight);
 
 2446   return this->GetMvaValue( err, errUpper, 0 );
 
 2456   const Event* ev = GetEvent();
 
 2457   if (fDoPreselection) {
 
 2458      Double_t val = ApplyPreselectionCuts(ev);
 
 2461   return PrivateGetMvaValue(ev, err, errUpper, useNTrees);
 
 2473   NoErrorCalc(err, errUpper);
 
 2477   UInt_t nTrees = fForest.size();
 
 2479   if (useNTrees > 0 ) nTrees = useNTrees;
 
 2481   if (fBoostType==
"Grad") 
return GetGradBoostMVA(ev,nTrees);
 
 2485   for (
UInt_t itree=0; itree<nTrees; itree++) {
 
 2487      myMVA += fBoostWeights[itree] * fForest[itree]->CheckEvent(ev,fUseYesNoLeaf);
 
 2488      norm  += fBoostWeights[itree];
 
 2500   if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = 
new std::vector<Float_t>();
 
 2501   fMulticlassReturnVal->clear();
 
 2503   UInt_t nClasses = DataInfo().GetNClasses();
 
 2504   std::vector<Double_t> temp(nClasses);
 
 2505   auto forestSize = fForest.size();
 
 2508   std::vector<TMVA::DecisionTree *> forest = fForest;
 
 2509   auto get_output = [&
e, &forest, &temp, forestSize, nClasses](
UInt_t iClass) {
 
 2510      for (
UInt_t itree = iClass; itree < forestSize; itree += nClasses) {
 
 2511         temp[iClass] += forest[itree]->CheckEvent(
e, 
kFALSE);
 
 2521   for (
UInt_t itree = 0; itree < forestSize; ++itree) {
 
 2522      temp[classOfTree] += fForest[itree]->CheckEvent(
e, 
kFALSE);
 
 2523      if (++classOfTree == nClasses) classOfTree = 0; 
 
 2529   std::transform(temp.begin(), temp.end(), temp.begin(), [](
Double_t d){return exp(d);});
 
 2531   Double_t exp_sum = std::accumulate(temp.begin(), temp.end(), 0.0);
 
 2533   for (
UInt_t i = 0; i < nClasses; i++) {
 
 2534      Double_t p_cls = temp[i] / exp_sum;
 
 2535      (*fMulticlassReturnVal).push_back(p_cls);
 
 2538   return *fMulticlassReturnVal;
 
 2547   if (fRegressionReturnVal == NULL) fRegressionReturnVal = 
new std::vector<Float_t>();
 
 2548   fRegressionReturnVal->clear();
 
 2550   const Event * ev = GetEvent();
 
 2555   if (fBoostType==
"AdaBoostR2") {
 
 2566      vector< Double_t > response(fForest.size());
 
 2567      vector< Double_t > weight(fForest.size());
 
 2570      for (
UInt_t itree=0; itree<fForest.size(); itree++) {
 
 2571         response[itree]    = fForest[itree]->CheckEvent(ev,
kFALSE);
 
 2572         weight[itree]      = fBoostWeights[itree];
 
 2573         totalSumOfWeights += fBoostWeights[itree];
 
 2576      std::vector< std::vector<Double_t> > vtemp;
 
 2577      vtemp.push_back( response ); 
 
 2578      vtemp.push_back( weight );
 
 2583      while (sumOfWeights <= totalSumOfWeights/2.) {
 
 2584         sumOfWeights += vtemp[1][t];
 
 2598   else if(fBoostType==
"Grad"){
 
 2599      for (
UInt_t itree=0; itree<fForest.size(); itree++) {
 
 2600         myMVA += fForest[itree]->CheckEvent(ev,
kFALSE);
 
 2603      evT->
SetTarget(0, myMVA+fBoostWeights[0] );
 
 2606      for (
UInt_t itree=0; itree<fForest.size(); itree++) {
 
 2608         myMVA += fBoostWeights[itree] * fForest[itree]->CheckEvent(ev,
kFALSE);
 
 2609         norm  += fBoostWeights[itree];
 
 2617   const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
 
 2618   fRegressionReturnVal->push_back( evT2->
GetTarget(0) );
 
 2623   return *fRegressionReturnVal;
 
 2632   Log() << kDEBUG << 
"\tWrite monitoring histograms to file: " << BaseDir()->GetPath() << 
Endl;
 
 2636   fMonitorNtuple->
Write();
 
 2647   fVariableImportance.resize(GetNvar());
 
 2648   for (
UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
 
 2649      fVariableImportance[ivar]=0;
 
 2652   for (
UInt_t itree = 0; itree < GetNTrees(); itree++) {
 
 2653      std::vector<Double_t> relativeImportance(fForest[itree]->GetVariableImportance());
 
 2654      for (
UInt_t i=0; i< relativeImportance.size(); i++) {
 
 2655         fVariableImportance[i] +=  fBoostWeights[itree] * relativeImportance[i];
 
 2659   for (
UInt_t ivar=0; ivar< fVariableImportance.size(); ivar++){
 
 2660      fVariableImportance[ivar] = 
TMath::Sqrt(fVariableImportance[ivar]);
 
 2661      sum += fVariableImportance[ivar];
 
 2663   for (
UInt_t ivar=0; ivar< fVariableImportance.size(); ivar++) fVariableImportance[ivar] /= 
sum;
 
 2665   return fVariableImportance;
 
 2675   std::vector<Double_t> relativeImportance = this->GetVariableImportance();
 
 2676   if (ivar < (
UInt_t)relativeImportance.size()) 
return relativeImportance[ivar];
 
 2677   else Log() << kFATAL << 
"<GetVariableImportance> ivar = " << ivar << 
" is out of range " << 
Endl;
 
 2689   vector< Double_t> importance(this->GetVariableImportance());
 
 2691   for (
UInt_t ivar=0; ivar<GetNvar(); ivar++) {
 
 2693      fRanking->AddRank( 
Rank( GetInputLabel(ivar), importance[ivar] ) );
 
 2707   Log() << 
"Boosted Decision Trees are a collection of individual decision" << 
Endl;
 
 2708   Log() << 
"trees which form a multivariate classifier by (weighted) majority " << 
Endl;
 
 2709   Log() << 
"vote of the individual trees. Consecutive decision trees are  " << 
Endl;
 
 2710   Log() << 
"trained using the original training data set with re-weighted " << 
Endl;
 
 2711   Log() << 
"events. By default, the AdaBoost method is employed, which gives " << 
Endl;
 
 2712   Log() << 
"events that were misclassified in the previous tree a larger " << 
Endl;
 
 2713   Log() << 
"weight in the training of the following tree." << 
Endl;
 
 2715   Log() << 
"Decision trees are a sequence of binary splits of the data sample" << 
Endl;
 
 2716   Log() << 
"using a single discriminant variable at a time. A test event " << 
Endl;
 
 2717   Log() << 
"ending up after the sequence of left-right splits in a final " << 
Endl;
 
 2718   Log() << 
"(\"leaf\") node is classified as either signal or background" << 
Endl;
 
 2719   Log() << 
"depending on the majority type of training events in that node." << 
Endl;
 
 2723   Log() << 
"By the nature of the binary splits performed on the individual" << 
Endl;
 
 2724   Log() << 
"variables, decision trees do not deal well with linear correlations" << 
Endl;
 
 2725   Log() << 
"between variables (they need to approximate the linear split in" << 
Endl;
 
 2726   Log() << 
"the two dimensional space by a sequence of splits on the two " << 
Endl;
 
 2727   Log() << 
"variables individually). Hence decorrelation could be useful " << 
Endl;
 
 2728   Log() << 
"to optimise the BDT performance." << 
Endl;
 
 2732   Log() << 
"The two most important parameters in the configuration are the  " << 
Endl;
 
 2733   Log() << 
"minimal number of events requested by a leaf node as percentage of the " <<
Endl;
 
 2734   Log() << 
"   number of training events (option \"MinNodeSize\"  replacing the actual number " << 
Endl;
 
 2735   Log() << 
" of events \"nEventsMin\" as given in earlier versions" << 
Endl;
 
 2736   Log() << 
"If this number is too large, detailed features " << 
Endl;
 
 2737   Log() << 
"in the parameter space are hard to be modelled. If it is too small, " << 
Endl;
 
 2738   Log() << 
"the risk to overtrain rises and boosting seems to be less effective" << 
Endl;
 
 2739   Log() << 
"  typical values from our current experience for best performance  " << 
Endl;
 
 2740   Log() << 
"  are between 0.5(%) and 10(%) " << 
Endl;
 
 2742   Log() << 
"The default minimal number is currently set to " << 
Endl;
 
 2743   Log() << 
"   max(20, (N_training_events / N_variables^2 / 10)) " << 
Endl;
 
 2744   Log() << 
"and can be changed by the user." << 
Endl;
 
 2746   Log() << 
"The other crucial parameter, the pruning strength (\"PruneStrength\")," << 
Endl;
 
 2747   Log() << 
"is also related to overtraining. It is a regularisation parameter " << 
Endl;
 
 2748   Log() << 
"that is used when determining after the training which splits " << 
Endl;
 
 2749   Log() << 
"are considered statistically insignificant and are removed. The" << 
Endl;
 
 2750   Log() << 
"user is advised to carefully watch the BDT screen output for" << 
Endl;
 
 2751   Log() << 
"the comparison between efficiencies obtained on the training and" << 
Endl;
 
 2752   Log() << 
"the independent test sample. They should be equal within statistical" << 
Endl;
 
 2753   Log() << 
"errors, in order to minimize statistical fluctuations in different samples." << 
Endl;
 
 2765   fout << 
"   std::vector<"<<nodeName<<
"*> fForest;       // i.e. root nodes of decision trees" << std::endl;
 
 2766   fout << 
"   std::vector<double>                fBoostWeights; // the weights applied in the individual boosts" << std::endl;
 
 2767   fout << 
"};" << std::endl << std::endl;
 
 2768   fout << 
"double " << className << 
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
 
 2769   fout << 
"{" << std::endl;
 
 2770   fout << 
"   double myMVA = 0;" << std::endl;
 
 2771   if (fDoPreselection){
 
 2772      for (
UInt_t ivar = 0; ivar< fIsLowBkgCut.size(); ivar++){
 
 2773         if (fIsLowBkgCut[ivar]){
 
 2774            fout << 
"   if (inputValues["<<ivar<<
"] < " << fLowBkgCut[ivar] << 
") return -1;  // is background preselection cut" << std::endl;
 
 2776         if (fIsLowSigCut[ivar]){
 
 2777            fout << 
"   if (inputValues["<<ivar<<
"] < "<< fLowSigCut[ivar] << 
") return  1;  // is signal preselection cut" << std::endl;
 
 2779         if (fIsHighBkgCut[ivar]){
 
 2780            fout << 
"   if (inputValues["<<ivar<<
"] > "<<fHighBkgCut[ivar] <<
")  return -1;  // is background preselection cut" << std::endl;
 
 2782         if (fIsHighSigCut[ivar]){
 
 2783            fout << 
"   if (inputValues["<<ivar<<
"] > "<<fHighSigCut[ivar]<<
")  return  1;  // is signal preselection cut" << std::endl;
 
 2788   if (fBoostType!=
"Grad"){
 
 2789      fout << 
"   double norm  = 0;" << std::endl;
 
 2791   fout << 
"   for (unsigned int itree=0; itree<fForest.size(); itree++){" << std::endl;
 
 2792   fout << 
"      "<<nodeName<<
" *current = fForest[itree];" << std::endl;
 
 2793   fout << 
"      while (current->GetNodeType() == 0) { //intermediate node" << std::endl;
 
 2794   fout << 
"         if (current->GoesRight(inputValues)) current=("<<nodeName<<
"*)current->GetRight();" << std::endl;
 
 2795   fout << 
"         else current=("<<nodeName<<
"*)current->GetLeft();" << std::endl;
 
 2796   fout << 
"      }" << std::endl;
 
 2797   if (fBoostType==
"Grad"){
 
 2798      fout << 
"      myMVA += current->GetResponse();" << std::endl;
 
 2800      if (fUseYesNoLeaf) fout << 
"      myMVA += fBoostWeights[itree] *  current->GetNodeType();" << std::endl;
 
 2801      else               fout << 
"      myMVA += fBoostWeights[itree] *  current->GetPurity();" << std::endl;
 
 2802      fout << 
"      norm  += fBoostWeights[itree];" << std::endl;
 
 2804   fout << 
"   }" << std::endl;
 
 2805   if (fBoostType==
"Grad"){
 
 2806      fout << 
"   return 2.0/(1.0+exp(-2.0*myMVA))-1.0;" << std::endl;
 
 2808   else fout << 
"   return myMVA /= norm;" << std::endl;
 
 2809   fout << 
"};" << std::endl << std::endl;
 
 2810   fout << 
"void " << className << 
"::Initialize()" << std::endl;
 
 2811   fout << 
"{" << std::endl;
 
 2813   for (
UInt_t itree=0; itree<GetNTrees(); itree++) {
 
 2814      fout << 
"  // itree = " << itree << std::endl;
 
 2815      fout << 
"  fBoostWeights.push_back(" << fBoostWeights[itree] << 
");" << std::endl;
 
 2816      fout << 
"  fForest.push_back( " << std::endl;
 
 2817      this->MakeClassInstantiateNode((
DecisionTreeNode*)fForest[itree]->GetRoot(), fout, className);
 
 2818      fout <<
"   );" << std::endl;
 
 2820   fout << 
"   return;" << std::endl;
 
 2821   fout << 
"};" << std::endl;
 
 2822   fout << 
" " << std::endl;
 
 2823   fout << 
"// Clean up" << std::endl;
 
 2824   fout << 
"inline void " << className << 
"::Clear() " << std::endl;
 
 2825   fout << 
"{" << std::endl;
 
 2826   fout << 
"   for (unsigned int itree=0; itree<fForest.size(); itree++) { " << std::endl;
 
 2827   fout << 
"      delete fForest[itree]; " << std::endl;
 
 2828   fout << 
"   }" << std::endl;
 
 2829   fout << 
"}" << std::endl;
 
 2841   fout << 
"#define NN new "<<nodeName << std::endl; 
 
 2843   fout << 
"   " << std::endl;
 
 2844   fout << 
"#ifndef "<<nodeName<<
"__def" << std::endl;
 
 2845   fout << 
"#define "<<nodeName<<
"__def" << std::endl;
 
 2846   fout << 
"   " << std::endl;
 
 2847   fout << 
"class "<<nodeName<<
" {" << std::endl;
 
 2848   fout << 
"   " << std::endl;
 
 2849   fout << 
"public:" << std::endl;
 
 2850   fout << 
"   " << std::endl;
 
 2851   fout << 
"   // constructor of an essentially \"empty\" node floating in space" << std::endl;
 
 2852   fout << 
"   "<<nodeName<<
" ( "<<nodeName<<
"* left,"<<nodeName<<
"* right," << std::endl;
 
 2853   if (fUseFisherCuts){
 
 2854      fout << 
"                          int nFisherCoeff," << std::endl;
 
 2855      for (
UInt_t i=0;i<GetNVariables()+1;i++){
 
 2856         fout << 
"                          double fisherCoeff"<<i<<
"," << std::endl;
 
 2859   fout << 
"                          int selector, double cutValue, bool cutType, " << std::endl;
 
 2860   fout << 
"                          int nodeType, double purity, double response ) :" << std::endl;
 
 2861   fout << 
"   fLeft         ( left         )," << std::endl;
 
 2862   fout << 
"   fRight        ( right        )," << std::endl;
 
 2863   if (fUseFisherCuts) fout << 
"   fNFisherCoeff ( nFisherCoeff )," << std::endl;
 
 2864   fout << 
"   fSelector     ( selector     )," << std::endl;
 
 2865   fout << 
"   fCutValue     ( cutValue     )," << std::endl;
 
 2866   fout << 
"   fCutType      ( cutType      )," << std::endl;
 
 2867   fout << 
"   fNodeType     ( nodeType     )," << std::endl;
 
 2868   fout << 
"   fPurity       ( purity       )," << std::endl;
 
 2869   fout << 
"   fResponse     ( response     ){" << std::endl;
 
 2870   if (fUseFisherCuts){
 
 2871      for (
UInt_t i=0;i<GetNVariables()+1;i++){
 
 2872         fout << 
"     fFisherCoeff.push_back(fisherCoeff"<<i<<
");" << std::endl;
 
 2875   fout << 
"   }" << std::endl << std::endl;
 
 2876   fout << 
"   virtual ~"<<nodeName<<
"();" << std::endl << std::endl;
 
 2877   fout << 
"   // test event if it descends the tree at this node to the right" << std::endl;
 
 2878   fout << 
"   virtual bool GoesRight( const std::vector<double>& inputValues ) const;" << std::endl;
 
 2879   fout << 
"   "<<nodeName<<
"* GetRight( void )  {return fRight; };" << std::endl << std::endl;
 
 2880   fout << 
"   // test event if it descends the tree at this node to the left " << std::endl;
 
 2881   fout << 
"   virtual bool GoesLeft ( const std::vector<double>& inputValues ) const;" << std::endl;
 
 2882   fout << 
"   "<<nodeName<<
"* GetLeft( void ) { return fLeft; };   " << std::endl << std::endl;
 
 2883   fout << 
"   // return  S/(S+B) (purity) at this node (from  training)" << std::endl << std::endl;
 
 2884   fout << 
"   double GetPurity( void ) const { return fPurity; } " << std::endl;
 
 2885   fout << 
"   // return the node type" << std::endl;
 
 2886   fout << 
"   int    GetNodeType( void ) const { return fNodeType; }" << std::endl;
 
 2887   fout << 
"   double GetResponse(void) const {return fResponse;}" << std::endl << std::endl;
 
 2888   fout << 
"private:" << std::endl << std::endl;
 
 2889   fout << 
"   "<<nodeName<<
"*   fLeft;     // pointer to the left daughter node" << std::endl;
 
 2890   fout << 
"   "<<nodeName<<
"*   fRight;    // pointer to the right daughter node" << std::endl;
 
 2891   if (fUseFisherCuts){
 
 2892      fout << 
"   int                     fNFisherCoeff; // =0 if this node doesn't use fisher, else =nvar+1 " << std::endl;
 
 2893      fout << 
"   std::vector<double>     fFisherCoeff;  // the fisher coeff (offset at the last element)" << std::endl;
 
 2895   fout << 
"   int                     fSelector; // index of variable used in node selection (decision tree)   " << std::endl;
 
 2896   fout << 
"   double                  fCutValue; // cut value applied on this node to discriminate bkg against sig" << std::endl;
 
 2897   fout << 
"   bool                    fCutType;  // true: if event variable > cutValue ==> signal , false otherwise" << std::endl;
 
 2898   fout << 
"   int                     fNodeType; // Type of node: -1 == Bkg-leaf, 1 == Signal-leaf, 0 = internal " << std::endl;
 
 2899   fout << 
"   double                  fPurity;   // Purity of node from training"<< std::endl;
 
 2900   fout << 
"   double                  fResponse; // Regression response value of node" << std::endl;
 
 2901   fout << 
"}; " << std::endl;
 
 2902   fout << 
"   " << std::endl;
 
 2903   fout << 
"//_______________________________________________________________________" << std::endl;
 
 2904   fout << 
"   "<<nodeName<<
"::~"<<nodeName<<
"()" << std::endl;
 
 2905   fout << 
"{" << std::endl;
 
 2906   fout << 
"   if (fLeft  != NULL) delete fLeft;" << std::endl;
 
 2907   fout << 
"   if (fRight != NULL) delete fRight;" << std::endl;
 
 2908   fout << 
"}; " << std::endl;
 
 2909   fout << 
"   " << std::endl;
 
 2910   fout << 
"//_______________________________________________________________________" << std::endl;
 
 2911   fout << 
"bool "<<nodeName<<
"::GoesRight( const std::vector<double>& inputValues ) const" << std::endl;
 
 2912   fout << 
"{" << std::endl;
 
 2913   fout << 
"   // test event if it descends the tree at this node to the right" << std::endl;
 
 2914   fout << 
"   bool result;" << std::endl;
 
 2915   if (fUseFisherCuts){
 
 2916      fout << 
"   if (fNFisherCoeff == 0){" << std::endl;
 
 2917      fout << 
"     result = (inputValues[fSelector] > fCutValue );" << std::endl;
 
 2918      fout << 
"   }else{" << std::endl;
 
 2919      fout << 
"     double fisher = fFisherCoeff.at(fFisherCoeff.size()-1);" << std::endl;
 
 2920      fout << 
"     for (unsigned int ivar=0; ivar<fFisherCoeff.size()-1; ivar++)" << std::endl;
 
 2921      fout << 
"       fisher += fFisherCoeff.at(ivar)*inputValues.at(ivar);" << std::endl;
 
 2922      fout << 
"     result = fisher > fCutValue;" << std::endl;
 
 2923      fout << 
"   }" << std::endl;
 
 2925      fout << 
"     result = (inputValues[fSelector] > fCutValue );" << std::endl;
 
 2927   fout << 
"   if (fCutType == true) return result; //the cuts are selecting Signal ;" << std::endl;
 
 2928   fout << 
"   else return !result;" << std::endl;
 
 2929   fout << 
"}" << std::endl;
 
 2930   fout << 
"   " << std::endl;
 
 2931   fout << 
"//_______________________________________________________________________" << std::endl;
 
 2932   fout << 
"bool "<<nodeName<<
"::GoesLeft( const std::vector<double>& inputValues ) const" << std::endl;
 
 2933   fout << 
"{" << std::endl;
 
 2934   fout << 
"   // test event if it descends the tree at this node to the left" << std::endl;
 
 2935   fout << 
"   if (!this->GoesRight(inputValues)) return true;" << std::endl;
 
 2936   fout << 
"   else return false;" << std::endl;
 
 2937   fout << 
"}" << std::endl;
 
 2938   fout << 
"   " << std::endl;
 
 2939   fout << 
"#endif" << std::endl;
 
 2940   fout << 
"   " << std::endl;
 
 2949      Log() << kFATAL << 
"MakeClassInstantiateNode: started with undefined node" <<
Endl;
 
 2952   fout << 
"NN("<<std::endl;
 
 2953   if (
n->GetLeft() != NULL){
 
 2954      this->MakeClassInstantiateNode( (
DecisionTreeNode*)
n->GetLeft() , fout, className);
 
 2959   fout << 
", " <<std::endl;
 
 2960   if (
n->GetRight() != NULL){
 
 2961      this->MakeClassInstantiateNode( (
DecisionTreeNode*)
n->GetRight(), fout, className );
 
 2966   fout << 
", " <<  std::endl
 
 2967        << std::setprecision(6);
 
 2968   if (fUseFisherCuts){
 
 2969      fout << 
n->GetNFisherCoeff() << 
", ";
 
 2970      for (
UInt_t i=0; i< GetNVariables()+1; i++) {
 
 2971         if (
n->GetNFisherCoeff() == 0 ){
 
 2974            fout << 
n->GetFisherCoeff(i) << 
", ";
 
 2978   fout << 
n->GetSelector() << 
", " 
 2979        << 
n->GetCutValue() << 
", " 
 2980        << 
n->GetCutType() << 
", " 
 2981        << 
n->GetNodeType() << 
", " 
 2982        << 
n->GetPurity() << 
"," 
 2983        << 
n->GetResponse() << 
") ";
 
 2994   Int_t nTotS_unWeighted = 0, nTotB_unWeighted = 0;
 
 2996   std::vector<TMVA::BDTEventWrapper> bdtEventSample;
 
 2998   fIsLowSigCut.assign(GetNvar(),
kFALSE);
 
 2999   fIsLowBkgCut.assign(GetNvar(),
kFALSE);
 
 3000   fIsHighSigCut.assign(GetNvar(),
kFALSE);
 
 3001   fIsHighBkgCut.assign(GetNvar(),
kFALSE);
 
 3003   fLowSigCut.assign(GetNvar(),0.);   
 
 3004   fLowBkgCut.assign(GetNvar(),0.);   
 
 3005   fHighSigCut.assign(GetNvar(),0.);  
 
 3006   fHighBkgCut.assign(GetNvar(),0.);  
 
 3011   for( std::vector<const TMVA::Event*>::const_iterator it = eventSample.begin(); it != eventSample.end(); ++it ) {
 
 3012      if (DataInfo().IsSignal(*it)){
 
 3013         nTotS += (*it)->GetWeight();
 
 3017         nTotB += (*it)->GetWeight();
 
 3023   for( 
UInt_t ivar = 0; ivar < GetNvar(); ivar++ ) { 
 
 3025      std::sort( bdtEventSample.begin(),bdtEventSample.end() ); 
 
 3027      Double_t bkgWeightCtr = 0.0, sigWeightCtr = 0.0;
 
 3028      std::vector<TMVA::BDTEventWrapper>::iterator it = bdtEventSample.begin(), it_end = bdtEventSample.end();
 
 3029      for( ; it != it_end; ++it ) {
 
 3030         if (DataInfo().IsSignal(**it))
 
 3031            sigWeightCtr += (**it)->GetWeight();
 
 3033            bkgWeightCtr += (**it)->GetWeight();
 
 3035         it->SetCumulativeWeight(
false,bkgWeightCtr);
 
 3036         it->SetCumulativeWeight(
true,sigWeightCtr);
 
 3041      Double_t dVal = (DataInfo().GetVariableInfo(ivar).GetMax() - DataInfo().GetVariableInfo(ivar).GetMin())/100. ;
 
 3042      Double_t nSelS, nSelB, effS=0.05, effB=0.05, rejS=0.05, rejB=0.05;
 
 3043      Double_t tmpEffS, tmpEffB, tmpRejS, tmpRejB;
 
 3048      for(
UInt_t iev = 1; iev < bdtEventSample.size(); iev++) {
 
 3051         nSelS = bdtEventSample[iev].GetCumulativeWeight(
true);
 
 3052         nSelB = bdtEventSample[iev].GetCumulativeWeight(
false);
 
 3054         tmpEffS=nSelS/nTotS;
 
 3055         tmpEffB=nSelB/nTotB;
 
 3058         if      (nSelS==0     && tmpEffB>effB)  {effB=tmpEffB; fLowBkgCut[ivar]  = bdtEventSample[iev].GetVal() - dVal; fIsLowBkgCut[ivar]=
kTRUE;}
 
 3059         else if (nSelB==0     && tmpEffS>effS)  {effS=tmpEffS; fLowSigCut[ivar]  = bdtEventSample[iev].GetVal() - dVal; fIsLowSigCut[ivar]=
kTRUE;}
 
 3060         else if (nSelB==nTotB && tmpRejS>rejS)  {rejS=tmpRejS; fHighSigCut[ivar] = bdtEventSample[iev].GetVal() + dVal; fIsHighSigCut[ivar]=
kTRUE;}
 
 3061         else if (nSelS==nTotS && tmpRejB>rejB)  {rejB=tmpRejB; fHighBkgCut[ivar] = bdtEventSample[iev].GetVal() + dVal; fIsHighBkgCut[ivar]=
kTRUE;}
 
 3066   Log() << kDEBUG << 
" \tfound and suggest the following possible pre-selection cuts " << 
Endl;
 
 3067   if (fDoPreselection) 
Log() << kDEBUG << 
"\tthe training will be done after these cuts... and GetMVA value returns +1, (-1) for a signal (bkg) event that passes these cuts" << 
Endl;
 
 3068   else  Log() << kDEBUG << 
"\tas option DoPreselection was not used, these cuts however will not be performed, but the training will see the full sample"<<
Endl;
 
 3069   for (
UInt_t ivar=0; ivar < GetNvar(); ivar++ ) { 
 
 3070      if (fIsLowBkgCut[ivar]){
 
 3071         Log() << kDEBUG  << 
" \tfound cut: Bkg if var " << ivar << 
" < "  << fLowBkgCut[ivar] << 
Endl;
 
 3073      if (fIsLowSigCut[ivar]){
 
 3074         Log() << kDEBUG  << 
" \tfound cut: Sig if var " << ivar << 
" < "  << fLowSigCut[ivar] << 
Endl;
 
 3076      if (fIsHighBkgCut[ivar]){
 
 3077         Log() << kDEBUG  << 
" \tfound cut: Bkg if var " << ivar << 
" > "  << fHighBkgCut[ivar] << 
Endl;
 
 3079      if (fIsHighSigCut[ivar]){
 
 3080         Log() << kDEBUG  << 
" \tfound cut: Sig if var " << ivar << 
" > "  << fHighSigCut[ivar] << 
Endl;
 
 3095   for (
UInt_t ivar=0; ivar < GetNvar(); ivar++ ) { 
 
 3096      if (fIsLowBkgCut[ivar]){
 
 3097         if (ev->
GetValue(ivar) < fLowBkgCut[ivar]) result = -1;  
 
 3099      if (fIsLowSigCut[ivar]){
 
 3100         if (ev->
GetValue(ivar) < fLowSigCut[ivar]) result =  1;  
 
 3102      if (fIsHighBkgCut[ivar]){
 
 3103         if (ev->
GetValue(ivar) > fHighBkgCut[ivar]) result = -1;  
 
 3105      if (fIsHighSigCut[ivar]){
 
 3106         if (ev->
GetValue(ivar) > fHighSigCut[ivar]) result =  1;  
 
#define REGISTER_METHOD(CLASS)
for example
static RooMathCoreReg dummy
char * Form(const char *fmt,...)
A pseudo container class which is a generator of indices.
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
void Foreach(F func, unsigned nTimes, unsigned nChunks=0)
Execute func (with no arguments) nTimes in parallel.
A Graph is a graphics object made of two arrays X and Y with npoints each.
virtual void SetPoint(Int_t i, Double_t x, Double_t y)
Set x and y values for point number i.
virtual void SetName(const char *name="")
Set graph name.
virtual void SetTitle(const char *title="")
Set graph title.
virtual void Set(Int_t n)
Set number of points in the graph Existing coordinates are preserved New coordinates above fNpoints a...
1-D histogram with a float per channel (see TH1 documentation)}
1-D histogram with an int per channel (see TH1 documentation)}
virtual void SetXTitle(const char *title)
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
virtual void SetYTitle(const char *title)
2-D histogram with a float per channel (see TH1 documentation)}
Service class for 2-Dim histogram classes.
Absolute Deviation BDT Loss Function.
static void SetVarIndex(Int_t iVar)
static Config & Instance()
static function: returns TMVA instance
ROOT::TThreadExecutor & GetThreadExecutor()
Implementation of the CrossEntropy as separation criterion.
Class that contains all the data information.
Implementation of a Decision Tree.
TMVA::DecisionTreeNode * GetEventNode(const TMVA::Event &e) const
get the pointer to the leaf node where a particular event ends up in... (used in gradient boosting)
Double_t CheckEvent(const TMVA::Event *, Bool_t UseYesNoLeaf=kFALSE) const
the event e is put into the decision tree (starting at the root node) and the output is NodeType (sig...
static DecisionTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Float_t GetTarget(UInt_t itgt) const
Implementation of the GiniIndex With Laplace correction as separation criterion.
Implementation of the GiniIndex as separation criterion.
The TMVA::Interval Class.
Least Squares BDT Loss Function.
The TMVA::Interval Class.
Analysis of Boosted Decision Trees.
void Init(void)
Common initialisation with defaults for the BDT-Method.
static const Int_t fgDebugLevel
MethodBDT(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
The standard constructor for the "boosted decision trees".
void BoostMonitor(Int_t iTree)
Fills the ROCIntegral vs Itree from the testSample for the monitoring plots during the training .
const std::vector< Float_t > & GetMulticlassValues()
Get the multiclass MVA response for the BDT classifier.
Double_t AdaBoostR2(std::vector< const TMVA::Event * > &, DecisionTree *dt)
Adaption of the AdaBoost to regression problems (see H.Drucker 1997).
void MakeClassSpecific(std::ostream &, const TString &) const
Make ROOT-independent C++ class for classifier response (classifier-specific implementation).
void GetHelpMessage() const
Get help message text.
LossFunctionBDT * fRegressionLossFunctionBDTG
void DeterminePreselectionCuts(const std::vector< const TMVA::Event * > &eventSample)
Find useful preselection cuts that will be applied before and Decision Tree training.
Double_t GradBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
Calculate the desired response value for each region.
const Ranking * CreateRanking()
Compute ranking of input variables.
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
Set the tuning parameters according to the argument.
Double_t AdaCost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
The AdaCost boosting algorithm takes a simple cost Matrix (currently fixed for all events....
void DeclareOptions()
Define the options (their key words).
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
Call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Double_t Boost(std::vector< const TMVA::Event * > &, DecisionTree *dt, UInt_t cls=0)
Apply the boosting algorithm (the algorithm is selecte via the the "option" given in the constructor.
Double_t TestTreeQuality(DecisionTree *dt)
Test the tree quality.. in terms of Misclassification.
Double_t Bagging()
Call it boot-strapping, re-sampling or whatever you like, in the end it is nothing else but applying ...
void UpdateTargets(std::vector< const TMVA::Event * > &, UInt_t cls=0)
Calculate residual for all events.
void UpdateTargetsRegression(std::vector< const TMVA::Event * > &, Bool_t first=kFALSE)
Calculate residuals for all events and update targets for next iter.
Double_t GradBoostRegression(std::vector< const TMVA::Event * > &, DecisionTree *dt)
Implementation of M_TreeBoost using any loss function as described by Friedman 1999.
void WriteMonitoringHistosToFile(void) const
Here we could write some histograms created during the processing to the output file.
virtual ~MethodBDT(void)
Destructor.
void AddWeightsXMLTo(void *parent) const
Write weights to XML.
Double_t GetGradBoostMVA(const TMVA::Event *e, UInt_t nTrees)
Returns MVA value: -1 for background, 1 for signal.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
BDT can handle classification with multiple classes and regression with one regression-target.
Double_t RegBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
A special boosting only for Regression (not implemented).
void InitEventSample()
Initialize the event sample (i.e. reset the boost-weights... etc).
Double_t ApplyPreselectionCuts(const Event *ev)
Apply the preselection cuts before even bothering about any Decision Trees in the GetMVA .
void SetMinNodeSize(Double_t sizeInPercent)
void ProcessOptions()
The option string is decoded, for available options see "DeclareOptions".
void PreProcessNegativeEventWeights()
O.k.
void MakeClassInstantiateNode(DecisionTreeNode *n, std::ostream &fout, const TString &className) const
Recursively descends a tree and writes the node instance to the output stream.
Double_t AdaBoost(std::vector< const TMVA::Event * > &, DecisionTree *dt)
The AdaBoost implementation.
std::vector< Double_t > GetVariableImportance()
Return the relative variable importance, normalized to all variables together having the importance 1...
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Double_t PrivateGetMvaValue(const TMVA::Event *ev, Double_t *err=0, Double_t *errUpper=0, UInt_t useNTrees=0)
Return the MVA value (range [-1;1]) that classifies the event according to the majority vote from the...
void InitGradBoost(std::vector< const TMVA::Event * > &)
Initialize targets for first tree.
void Train(void)
BDT training.
void GetBaggedSubSample(std::vector< const TMVA::Event * > &)
Fills fEventSample with fBaggedSampleFraction*NEvents random training events.
const std::vector< Float_t > & GetRegressionValues()
Get the regression value generated by the BDTs.
SeparationBase * fSepType
void ReadWeightsFromXML(void *parent)
Reads the BDT from the xml file.
void ReadWeightsFromStream(std::istream &istr)
Read the weights (BDT coefficients).
void Reset(void)
Reset the method, as if it had just been instantiated (forget all training etc.).
void MakeClassSpecificHeader(std::ostream &, const TString &) const
Specific class header.
void DeclareCompatibilityOptions()
Options that are used ONLY for the READER to ensure backward compatibility.
Virtual base Class for all MVA method.
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Implementation of the MisClassificationError as separation criterion.
std::map< TString, Double_t > optimize()
PDF wrapper for histograms; uses user-defined spline interpolation.
Ranking for variables in method (implementation)
Class that is the base-class for a vector of result.
TGraph * GetGraph(const TString &alias) const
TH1 * GetHist(const TString &alias) const
void Store(TObject *obj, const char *alias=0)
Implementation of the SdivSqrtSplusB as separation criterion.
Timing information for training and evaluation of MVA methods.
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Singleton class for Global types used by TMVA.
virtual Double_t Determinant() const
TMatrixTSym< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant Notice that the LU decomposition is used instead of B...
virtual const char * GetTitle() const
Returns title of object.
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
virtual void Delete(Option_t *option="")
Delete this object.
virtual Int_t Read(const char *name)
Read contents of object with specified name from the current directory.
Random number generator class based on M.
virtual Double_t PoissonD(Double_t mean)
Generates a random number according to a Poisson law.
Double_t Atof() const
Return floating-point value contained in string.
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
TString & ReplaceAll(const TString &s1, const TString &s2)
TString & Append(const char *cs)
A TTree object has a header with a name and a title.
std::string GetMethodName(TCppMethod_t)
std::string GetName(const std::string &scope_name)
static const uint32_t K[64]
void Print(std::ostream &os, const OptionType &opt)
double dist(Rotation3D const &r1, Rotation3D const &r2)
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
TSeq< unsigned int > TSeqU
Abstract ClassifierFactory template that handles arbitrary types.
void BDT(TString dataset, const TString &fin="TMVA.root")
MsgLogger & Endl(MsgLogger &ml)
Short_t Max(Short_t a, Short_t b)
Int_t FloorNint(Double_t x)
constexpr Double_t E()
Base of natural log:
Double_t Sqrt(Double_t x)
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Int_t CeilNint(Double_t x)
Short_t Min(Short_t a, Short_t b)
static long int sum(long int i)