// @(#)root/tmva $Id$
// Author: Alexander Voigt

/**********************************************************************************
 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
 * Package: TMVA                                                                  *
 * Classes: PDEFoamDecisionTree                                                   *
 * Web    : http://tmva.sourceforge.net                                           *
 *                                                                                *
 * Description:                                                                   *
 *      Implementation of decision tree like PDEFoam                              *
 *                                                                                *
 * Authors (alphabetical):                                                        *
 *      Tancredi Carli   - CERN, Switzerland                                      *
 *      Dominik Dannheim - CERN, Switzerland                                      *
 *      S. Jadach        - Institute of Nuclear Physics, Cracow, Poland           *
 *      Alexander Voigt  - TU Dresden, Germany                                    *
 *      Peter Speckmayer - CERN, Switzerland                                      *
 *                                                                                *
 * Copyright (c) 2010:                                                            *
 *      CERN, Switzerland                                                         *
 *      MPI-K Heidelberg, Germany                                                 *
 *                                                                                *
 * Redistribution and use in source and binary forms, with or without             *
 * modification, are permitted according to the terms listed in LICENSE           *
 * (http://tmva.sourceforge.net/LICENSE)                                          *
 **********************************************************************************/

//_____________________________________________________________________
//
// PDEFoamDecisionTree
//
// This PDEFoam variant acts like a decision tree and stores in every
// cell the discriminant
//
//    D = #events with given class / total number of events
//
// as well as the statistical error on the discriminant.  It therefore
// acts as a discriminant estimator.  The decision tree-like behaviour
// is achieved by overriding PDEFoamDiscriminant::Explore() to use a
// decision tree-like cell splitting algorithm (given a separation
// type).
//
// This PDEFoam variant should be booked together with the
// PDEFoamDecisionTreeDensity density estimator, which returns the
// events in a cell without sampling.
//
//_____________________________________________________________________

#ifndef ROOT_TMVA_PDEFoamDecisionTree
#include "TMVA/PDEFoamDecisionTree.h"
#endif
#ifndef ROOT_TMVA_PDEFoamDecisionTreeDensity
#include "TMVA/PDEFoamDecisionTreeDensity.h"
#endif

ClassImp(TMVA::PDEFoamDecisionTree)

//_____________________________________________________________________
TMVA::PDEFoamDecisionTree::PDEFoamDecisionTree()
   : PDEFoamDiscriminant()
   , fSepType(NULL)
{
   // Default constructor for streamer, user should not use it.
}

//_____________________________________________________________________
TMVA::PDEFoamDecisionTree::PDEFoamDecisionTree(const TString& name, SeparationBase *sepType, UInt_t cls)
   : PDEFoamDiscriminant(name, cls)
   , fSepType(sepType)
{
   // Parameters:
   //
   // - name - name of the foam
   //
   // - sepType - separation type used for the cell splitting (will be
   //   deleted in the destructor)
   //
   // - cls - class to consider as signal when calcualting the purity
}

//_____________________________________________________________________
TMVA::PDEFoamDecisionTree::PDEFoamDecisionTree(const PDEFoamDecisionTree &from)
   : PDEFoamDiscriminant(from)
   , fSepType(from.fSepType)
{
   // Copy Constructor  NOT IMPLEMENTED (NEVER USED)
   Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl;
}

//_____________________________________________________________________
TMVA::PDEFoamDecisionTree::~PDEFoamDecisionTree()
{
   // Destructor
   // deletes fSepType
   if (fSepType)
      delete fSepType;
}

//_____________________________________________________________________
void TMVA::PDEFoamDecisionTree::Explore(PDEFoamCell *cell)
{
   // Internal subprogram used by Create.  It explores newly defined
   // cell with according to the decision tree logic.  The separation
   // set via the 'sepType' option in the constructor.
   //
   // The optimal division point for eventual future cell division is
   // determined/recorded.  Note that links to parents and initial
   // volume = 1/2 parent has to be already defined prior to calling
   // this routine.
   //
   // Note, that according to the decision tree logic, a cell is only
   // split, if the number of (unweighted) events in each dautghter
   // cell is greater than fNmin.

   if (!cell)
      Log() << kFATAL << "<DTExplore> Null pointer given!" << Endl;

   // create edge histograms
   std::vector<TH1D*> hsig, hbkg, hsig_unw, hbkg_unw;
   hsig.reserve(fDim);
   hbkg.reserve(fDim);
   hsig_unw.reserve(fDim);
   hbkg_unw.reserve(fDim);
   for (Int_t idim = 0; idim < fDim; idim++) {
      hsig.push_back(new TH1D(Form("hsig_%i", idim),
                              Form("signal[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
      hbkg.push_back(new TH1D(Form("hbkg_%i", idim),
                              Form("background[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
      hsig_unw.push_back(new TH1D(Form("hsig_unw_%i", idim),
                                  Form("signal_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
      hbkg_unw.push_back(new TH1D(Form("hbkg_unw_%i", idim),
                                  Form("background_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
   }

   // get cell position and size
   PDEFoamVect  cellSize(GetTotDim()), cellPosi(GetTotDim());
   cell->GetHcub(cellPosi, cellSize);

   // determine lower and upper cell bound
   std::vector<Double_t> lb(GetTotDim()); // lower bound
   std::vector<Double_t> ub(GetTotDim()); // upper bound
   for (Int_t idim = 0; idim < GetTotDim(); idim++) {
      lb[idim] = VarTransformInvers(idim, cellPosi[idim] - std::numeric_limits<float>::epsilon());
      ub[idim] = VarTransformInvers(idim, cellPosi[idim] + cellSize[idim] + std::numeric_limits<float>::epsilon());
   }

   // fDistr must be of type PDEFoamDecisionTreeDensity*
   PDEFoamDecisionTreeDensity *distr = dynamic_cast<PDEFoamDecisionTreeDensity*>(fDistr);
   if (distr == NULL)
      Log() << kFATAL << "<PDEFoamDecisionTree::Explore>: cast failed: "
            << "PDEFoamDensityBase* --> PDEFoamDecisionTreeDensity*" << Endl;

   // create TMVA::Volume object needed for searching within the BST
   TMVA::Volume volume(&lb, &ub);

   // fill the signal and background histograms for the given volume
   distr->FillHistograms(volume, hsig, hbkg, hsig_unw, hbkg_unw);

   // ------ determine the best division edge
   Double_t xBest = 0.5;    // best division point
   Int_t    kBest = -1;     // best split dimension
   Double_t maxGain = -1.0; // maximum gain
   Double_t nTotS = hsig.at(0)->Integral(0, hsig.at(0)->GetNbinsX() + 1);
   Double_t nTotB = hbkg.at(0)->Integral(0, hbkg.at(0)->GetNbinsX() + 1);
   Double_t nTotS_unw = hsig_unw.at(0)->Integral(0, hsig_unw.at(0)->GetNbinsX() + 1);
   Double_t nTotB_unw = hbkg_unw.at(0)->Integral(0, hbkg_unw.at(0)->GetNbinsX() + 1);

   for (Int_t idim = 0; idim < fDim; ++idim) {
      Double_t nSelS = hsig.at(idim)->GetBinContent(0);
      Double_t nSelB = hbkg.at(idim)->GetBinContent(0);
      Double_t nSelS_unw = hsig_unw.at(idim)->GetBinContent(0);
      Double_t nSelB_unw = hbkg_unw.at(idim)->GetBinContent(0);
      for (Int_t jLo = 1; jLo < fNBin; jLo++) {
         nSelS += hsig.at(idim)->GetBinContent(jLo);
         nSelB += hbkg.at(idim)->GetBinContent(jLo);
         nSelS_unw += hsig_unw.at(idim)->GetBinContent(jLo);
         nSelB_unw += hbkg_unw.at(idim)->GetBinContent(jLo);

         // proceed if total number of events in left and right cell
         // is greater than fNmin
         if (!((nSelS_unw + nSelB_unw) >= GetNmin() &&
               (nTotS_unw - nSelS_unw + nTotB_unw - nSelB_unw) >= GetNmin()))
            continue;

         Double_t xLo = 1.0 * jLo / fNBin;

         // calculate separation gain
         Double_t gain = fSepType->GetSeparationGain(nSelS, nSelB, nTotS, nTotB);

         if (gain >= maxGain) {
            maxGain = gain;
            xBest   = xLo;
            kBest   = idim;
         }
      } // jLo
   } // idim

   if (kBest >= fDim || kBest < 0) {
      // No best division edge found!  One must ensure, that this cell
      // is not chosen for splitting in PeekMax().  But since in
      // PeekMax() it is ensured that cell->GetDriv() > epsilon, one
      // should set maxGain to -1.0 (or even 0.0?) here.
      maxGain = -1.0;
   }

   // set cell properties
   cell->SetBest(kBest);
   cell->SetXdiv(xBest);
   if (nTotB + nTotS > 0)
      cell->SetIntg(nTotS / (nTotB + nTotS));
   else
      cell->SetIntg(0.0);
   cell->SetDriv(maxGain);
   cell->CalcVolume();

   // set cell element 0 (total number of events in cell) during
   // build-up
   if (GetNmin() > 0)
      SetCellElement(cell, 0, nTotS + nTotB);

   // clean up
   for (UInt_t ih = 0; ih < hsig.size(); ih++)  delete hsig.at(ih);
   for (UInt_t ih = 0; ih < hbkg.size(); ih++)  delete hbkg.at(ih);
   for (UInt_t ih = 0; ih < hsig_unw.size(); ih++)  delete hsig_unw.at(ih);
   for (UInt_t ih = 0; ih < hbkg_unw.size(); ih++)  delete hbkg_unw.at(ih);
}
 PDEFoamDecisionTree.cxx:1
 PDEFoamDecisionTree.cxx:2
 PDEFoamDecisionTree.cxx:3
 PDEFoamDecisionTree.cxx:4
 PDEFoamDecisionTree.cxx:5
 PDEFoamDecisionTree.cxx:6
 PDEFoamDecisionTree.cxx:7
 PDEFoamDecisionTree.cxx:8
 PDEFoamDecisionTree.cxx:9
 PDEFoamDecisionTree.cxx:10
 PDEFoamDecisionTree.cxx:11
 PDEFoamDecisionTree.cxx:12
 PDEFoamDecisionTree.cxx:13
 PDEFoamDecisionTree.cxx:14
 PDEFoamDecisionTree.cxx:15
 PDEFoamDecisionTree.cxx:16
 PDEFoamDecisionTree.cxx:17
 PDEFoamDecisionTree.cxx:18
 PDEFoamDecisionTree.cxx:19
 PDEFoamDecisionTree.cxx:20
 PDEFoamDecisionTree.cxx:21
 PDEFoamDecisionTree.cxx:22
 PDEFoamDecisionTree.cxx:23
 PDEFoamDecisionTree.cxx:24
 PDEFoamDecisionTree.cxx:25
 PDEFoamDecisionTree.cxx:26
 PDEFoamDecisionTree.cxx:27
 PDEFoamDecisionTree.cxx:28
 PDEFoamDecisionTree.cxx:29
 PDEFoamDecisionTree.cxx:30
 PDEFoamDecisionTree.cxx:31
 PDEFoamDecisionTree.cxx:32
 PDEFoamDecisionTree.cxx:33
 PDEFoamDecisionTree.cxx:34
 PDEFoamDecisionTree.cxx:35
 PDEFoamDecisionTree.cxx:36
 PDEFoamDecisionTree.cxx:37
 PDEFoamDecisionTree.cxx:38
 PDEFoamDecisionTree.cxx:39
 PDEFoamDecisionTree.cxx:40
 PDEFoamDecisionTree.cxx:41
 PDEFoamDecisionTree.cxx:42
 PDEFoamDecisionTree.cxx:43
 PDEFoamDecisionTree.cxx:44
 PDEFoamDecisionTree.cxx:45
 PDEFoamDecisionTree.cxx:46
 PDEFoamDecisionTree.cxx:47
 PDEFoamDecisionTree.cxx:48
 PDEFoamDecisionTree.cxx:49
 PDEFoamDecisionTree.cxx:50
 PDEFoamDecisionTree.cxx:51
 PDEFoamDecisionTree.cxx:52
 PDEFoamDecisionTree.cxx:53
 PDEFoamDecisionTree.cxx:54
 PDEFoamDecisionTree.cxx:55
 PDEFoamDecisionTree.cxx:56
 PDEFoamDecisionTree.cxx:57
 PDEFoamDecisionTree.cxx:58
 PDEFoamDecisionTree.cxx:59
 PDEFoamDecisionTree.cxx:60
 PDEFoamDecisionTree.cxx:61
 PDEFoamDecisionTree.cxx:62
 PDEFoamDecisionTree.cxx:63
 PDEFoamDecisionTree.cxx:64
 PDEFoamDecisionTree.cxx:65
 PDEFoamDecisionTree.cxx:66
 PDEFoamDecisionTree.cxx:67
 PDEFoamDecisionTree.cxx:68
 PDEFoamDecisionTree.cxx:69
 PDEFoamDecisionTree.cxx:70
 PDEFoamDecisionTree.cxx:71
 PDEFoamDecisionTree.cxx:72
 PDEFoamDecisionTree.cxx:73
 PDEFoamDecisionTree.cxx:74
 PDEFoamDecisionTree.cxx:75
 PDEFoamDecisionTree.cxx:76
 PDEFoamDecisionTree.cxx:77
 PDEFoamDecisionTree.cxx:78
 PDEFoamDecisionTree.cxx:79
 PDEFoamDecisionTree.cxx:80
 PDEFoamDecisionTree.cxx:81
 PDEFoamDecisionTree.cxx:82
 PDEFoamDecisionTree.cxx:83
 PDEFoamDecisionTree.cxx:84
 PDEFoamDecisionTree.cxx:85
 PDEFoamDecisionTree.cxx:86
 PDEFoamDecisionTree.cxx:87
 PDEFoamDecisionTree.cxx:88
 PDEFoamDecisionTree.cxx:89
 PDEFoamDecisionTree.cxx:90
 PDEFoamDecisionTree.cxx:91
 PDEFoamDecisionTree.cxx:92
 PDEFoamDecisionTree.cxx:93
 PDEFoamDecisionTree.cxx:94
 PDEFoamDecisionTree.cxx:95
 PDEFoamDecisionTree.cxx:96
 PDEFoamDecisionTree.cxx:97
 PDEFoamDecisionTree.cxx:98
 PDEFoamDecisionTree.cxx:99
 PDEFoamDecisionTree.cxx:100
 PDEFoamDecisionTree.cxx:101
 PDEFoamDecisionTree.cxx:102
 PDEFoamDecisionTree.cxx:103
 PDEFoamDecisionTree.cxx:104
 PDEFoamDecisionTree.cxx:105
 PDEFoamDecisionTree.cxx:106
 PDEFoamDecisionTree.cxx:107
 PDEFoamDecisionTree.cxx:108
 PDEFoamDecisionTree.cxx:109
 PDEFoamDecisionTree.cxx:110
 PDEFoamDecisionTree.cxx:111
 PDEFoamDecisionTree.cxx:112
 PDEFoamDecisionTree.cxx:113
 PDEFoamDecisionTree.cxx:114
 PDEFoamDecisionTree.cxx:115
 PDEFoamDecisionTree.cxx:116
 PDEFoamDecisionTree.cxx:117
 PDEFoamDecisionTree.cxx:118
 PDEFoamDecisionTree.cxx:119
 PDEFoamDecisionTree.cxx:120
 PDEFoamDecisionTree.cxx:121
 PDEFoamDecisionTree.cxx:122
 PDEFoamDecisionTree.cxx:123
 PDEFoamDecisionTree.cxx:124
 PDEFoamDecisionTree.cxx:125
 PDEFoamDecisionTree.cxx:126
 PDEFoamDecisionTree.cxx:127
 PDEFoamDecisionTree.cxx:128
 PDEFoamDecisionTree.cxx:129
 PDEFoamDecisionTree.cxx:130
 PDEFoamDecisionTree.cxx:131
 PDEFoamDecisionTree.cxx:132
 PDEFoamDecisionTree.cxx:133
 PDEFoamDecisionTree.cxx:134
 PDEFoamDecisionTree.cxx:135
 PDEFoamDecisionTree.cxx:136
 PDEFoamDecisionTree.cxx:137
 PDEFoamDecisionTree.cxx:138
 PDEFoamDecisionTree.cxx:139
 PDEFoamDecisionTree.cxx:140
 PDEFoamDecisionTree.cxx:141
 PDEFoamDecisionTree.cxx:142
 PDEFoamDecisionTree.cxx:143
 PDEFoamDecisionTree.cxx:144
 PDEFoamDecisionTree.cxx:145
 PDEFoamDecisionTree.cxx:146
 PDEFoamDecisionTree.cxx:147
 PDEFoamDecisionTree.cxx:148
 PDEFoamDecisionTree.cxx:149
 PDEFoamDecisionTree.cxx:150
 PDEFoamDecisionTree.cxx:151
 PDEFoamDecisionTree.cxx:152
 PDEFoamDecisionTree.cxx:153
 PDEFoamDecisionTree.cxx:154
 PDEFoamDecisionTree.cxx:155
 PDEFoamDecisionTree.cxx:156
 PDEFoamDecisionTree.cxx:157
 PDEFoamDecisionTree.cxx:158
 PDEFoamDecisionTree.cxx:159
 PDEFoamDecisionTree.cxx:160
 PDEFoamDecisionTree.cxx:161
 PDEFoamDecisionTree.cxx:162
 PDEFoamDecisionTree.cxx:163
 PDEFoamDecisionTree.cxx:164
 PDEFoamDecisionTree.cxx:165
 PDEFoamDecisionTree.cxx:166
 PDEFoamDecisionTree.cxx:167
 PDEFoamDecisionTree.cxx:168
 PDEFoamDecisionTree.cxx:169
 PDEFoamDecisionTree.cxx:170
 PDEFoamDecisionTree.cxx:171
 PDEFoamDecisionTree.cxx:172
 PDEFoamDecisionTree.cxx:173
 PDEFoamDecisionTree.cxx:174
 PDEFoamDecisionTree.cxx:175
 PDEFoamDecisionTree.cxx:176
 PDEFoamDecisionTree.cxx:177
 PDEFoamDecisionTree.cxx:178
 PDEFoamDecisionTree.cxx:179
 PDEFoamDecisionTree.cxx:180
 PDEFoamDecisionTree.cxx:181
 PDEFoamDecisionTree.cxx:182
 PDEFoamDecisionTree.cxx:183
 PDEFoamDecisionTree.cxx:184
 PDEFoamDecisionTree.cxx:185
 PDEFoamDecisionTree.cxx:186
 PDEFoamDecisionTree.cxx:187
 PDEFoamDecisionTree.cxx:188
 PDEFoamDecisionTree.cxx:189
 PDEFoamDecisionTree.cxx:190
 PDEFoamDecisionTree.cxx:191
 PDEFoamDecisionTree.cxx:192
 PDEFoamDecisionTree.cxx:193
 PDEFoamDecisionTree.cxx:194
 PDEFoamDecisionTree.cxx:195
 PDEFoamDecisionTree.cxx:196
 PDEFoamDecisionTree.cxx:197
 PDEFoamDecisionTree.cxx:198
 PDEFoamDecisionTree.cxx:199
 PDEFoamDecisionTree.cxx:200
 PDEFoamDecisionTree.cxx:201
 PDEFoamDecisionTree.cxx:202
 PDEFoamDecisionTree.cxx:203
 PDEFoamDecisionTree.cxx:204
 PDEFoamDecisionTree.cxx:205
 PDEFoamDecisionTree.cxx:206
 PDEFoamDecisionTree.cxx:207
 PDEFoamDecisionTree.cxx:208
 PDEFoamDecisionTree.cxx:209
 PDEFoamDecisionTree.cxx:210
 PDEFoamDecisionTree.cxx:211
 PDEFoamDecisionTree.cxx:212
 PDEFoamDecisionTree.cxx:213
 PDEFoamDecisionTree.cxx:214
 PDEFoamDecisionTree.cxx:215
 PDEFoamDecisionTree.cxx:216
 PDEFoamDecisionTree.cxx:217
 PDEFoamDecisionTree.cxx:218
 PDEFoamDecisionTree.cxx:219
 PDEFoamDecisionTree.cxx:220
 PDEFoamDecisionTree.cxx:221
 PDEFoamDecisionTree.cxx:222
 PDEFoamDecisionTree.cxx:223
 PDEFoamDecisionTree.cxx:224
 PDEFoamDecisionTree.cxx:225
 PDEFoamDecisionTree.cxx:226
 PDEFoamDecisionTree.cxx:227