// @(#)root/tmva $Id$
// Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Jan Therhaag, Jiahang Zhong

/**********************************************************************************
 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
 * Package: TMVA                                                                  *
 * Class  : MethodMLP                                                             *
 * Web    : http://tmva.sourceforge.net                                           *
 *                                                                                *
 * Description:                                                                   *
 *      ANN Multilayer Perceptron  class for the discrimination of signal         *
 *      from background.  BFGS implementation based on TMultiLayerPerceptron      *
 *      class from ROOT (http://root.cern.ch).                                    *
 *                                                                                *
 * Authors (alphabetical):                                                        *
 *      Krzysztof Danielowski <danielow@cern.ch>       - IFJ & AGH, Poland        *
 *      Andreas Hoecker       <Andreas.Hocker@cern.ch> - CERN, Switzerland        *
 *      Matt Jachowski        <jachowski@stanford.edu> - Stanford University, USA *
 *      Kamil Kraszewski      <kalq@cern.ch>           - IFJ & UJ, Poland         *
 *      Maciej Kruk           <mkruk@cern.ch>          - IFJ & AGH, Poland        *
 *      Peter Speckmayer      <peter.speckmayer@cern.ch> - CERN, Switzerland      *
 *      Joerg Stelzer         <stelzer@cern.ch>        - DESY, Germany            *
 *      Jan Therhaag          <Jan.Therhaag@cern.ch>   - U of Bonn, Germany       *
 *      Eckhard v. Toerne     <evt@uni-bonn.de>        - U of Bonn, Germany       *
 *      Jiahang Zhong         <Jiahang.Zhong@cern.ch>  - Academia Sinica, Taipei  *
 *                                                                                *
 * Copyright (c) 2005-2011:                                                       *
 *      CERN, Switzerland                                                         *
 *      U. of Victoria, Canada                                                    *
 *      MPI-K Heidelberg, Germany                                                 *
 *      U. of Bonn, Germany                                                       *
 *                                                                                *
 * Redistribution and use in source and binary forms, with or without             *
 * modification, are permitted according to the terms listed in LICENSE           *
 * (http://tmva.sourceforge.net/LICENSE)                                          *
 **********************************************************************************/

#ifndef ROOT_TMVA_MethodMLP
#define ROOT_TMVA_MethodMLP

//////////////////////////////////////////////////////////////////////////
//                                                                      //
// MethodMLP                                                            //
//                                                                      //
// Multilayer Perceptron built off of MethodANNBase                     //
//                                                                      //
//////////////////////////////////////////////////////////////////////////

#include <vector>
#ifndef ROOT_TString
#include "TString.h"
#endif
#ifndef ROOT_TTree
#include "TTree.h"
#endif
#ifndef ROOT_TObjArray
#include "TObjArray.h"
#endif
#ifndef ROOT_TRandom3
#include "TRandom3.h"
#endif
#ifndef ROOT_TH1F
#include "TH1F.h"
#endif
#ifndef ROOT_TMatrixDfwd
#include "TMatrixDfwd.h"
#endif

#ifndef ROOT_TMVA_IFitterTarget
#include "TMVA/IFitterTarget.h"
#endif
#ifndef ROOT_TMVA_MethodBase
#include "TMVA/MethodBase.h"
#endif
#ifndef ROOT_TMVA_MethodANNBase
#include "TMVA/MethodANNBase.h"
#endif
#ifndef ROOT_TMVA_TNeuron
#include "TMVA/TNeuron.h"
#endif
#ifndef ROOT_TMVA_TActivation
#include "TMVA/TActivation.h"
#endif
#ifndef ROOT_TMVA_ConvergenceTest
#include "TMVA/ConvergenceTest.h"
#endif

#define MethodMLP_UseMinuit__
#undef  MethodMLP_UseMinuit__

namespace TMVA {

   class MethodMLP : public MethodANNBase, public IFitterTarget, public ConvergenceTest {

   public:

      // standard constructors
      MethodMLP( const TString& jobName,
                 const TString&  methodTitle,
                 DataSetInfo& theData,
                 const TString& theOption,
                 TDirectory* theTargetDir = 0 );

      MethodMLP( DataSetInfo& theData,
                 const TString& theWeightFile,
                 TDirectory* theTargetDir = 0 );

      virtual ~MethodMLP();

      virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );

      void Train() { Train(NumCycles()); }

      // for GA
      Double_t ComputeEstimator ( std::vector<Double_t>& parameters );
      Double_t EstimatorFunction( std::vector<Double_t>& parameters );

      enum ETrainingMethod { kBP=0, kBFGS, kGA };
      enum EBPTrainingMode { kSequential=0, kBatch };

      bool     HasInverseHessian() { return fCalculateErrors; }
      Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper=0 );

   protected:

      // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
      void MakeClassSpecific( std::ostream&, const TString& ) const;

      // get help message text
      void GetHelpMessage() const;


   private:

      // the option handling methods
      void DeclareOptions();
      void ProcessOptions();

      // general helper functions
      void     Train( Int_t nEpochs );
      void     Init();
      void     InitializeLearningRates(); // although this is only needed by backprop

      // used as a measure of success in all minimization techniques
      Double_t CalculateEstimator( Types::ETreeType treeType = Types::kTraining, Int_t iEpoch = -1 );

      // BFGS functions
      void     BFGSMinimize( Int_t nEpochs );
      void     SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &Buffer );
      void     SteepestDir( TMatrixD &Dir );
      Bool_t   GetHessian( TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta );
      void     SetDir( TMatrixD &Hessian, TMatrixD &Dir );
      Double_t DerivDir( TMatrixD &Dir );
      Bool_t   LineSearch( TMatrixD &Dir, std::vector<Double_t> &Buffer, Double_t* dError=0 ); //zjh
      void     ComputeDEDw();
      void     SimulateEvent( const Event* ev );
      void     SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha );
      Double_t GetError();
      Double_t GetMSEErr( const Event* ev, UInt_t index = 0 );   //zjh
      Double_t GetCEErr( const Event* ev, UInt_t index = 0 );   //zjh

      // backpropagation functions
      void     BackPropagationMinimize( Int_t nEpochs );
      void     TrainOneEpoch();
      void     Shuffle( Int_t* index, Int_t n );
      void     DecaySynapseWeights(Bool_t lateEpoch );
      void     TrainOneEvent( Int_t ievt);
      Double_t GetDesiredOutput( const Event* ev );
      void     UpdateNetwork( Double_t desired, Double_t eventWeight=1.0 );
      void     UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight=1.0);
      void     CalculateNeuronDeltas();
      void     UpdateSynapses();
      void     AdjustSynapseWeights();

      // faster backpropagation
      void     TrainOneEventFast( Int_t ievt, Float_t*& branchVar, Int_t& type );

      // genetic algorithm functions
      void GeneticMinimize();
      

#ifdef MethodMLP_UseMinuit__
      // minuit functions -- commented out because they rely on a static pointer
      void MinuitMinimize();
      static MethodMLP* GetThisPtr();
      static void IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t ifl );
      void FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t ifl );
#endif

      // general
      bool               fUseRegulator;         // zjh
      bool               fCalculateErrors;      // compute inverse hessian matrix at the end of the training
      Double_t           fPrior;                // zjh
      std::vector<Double_t> fPriorDev;          // zjh
      void               GetApproxInvHessian ( TMatrixD& InvHessian, bool regulate=true );   //rank-1 approximation, neglect 2nd derivatives. //zjh
      void               UpdateRegulators();    // zjh
      void               UpdatePriors();        // zjh
      Int_t				 fUpdateLimit;          // zjh

      ETrainingMethod fTrainingMethod; // method of training, BP or GA
      TString         fTrainMethodS;   // training method option param

      Float_t         fSamplingFraction;  // fraction of events which is sampled for training
      Float_t         fSamplingEpoch;     // fraction of epochs where sampling is used
      Float_t         fSamplingWeight;    // changing factor for event weights when sampling is turned on
      Bool_t          fSamplingTraining;  // The training sample is sampled
      Bool_t          fSamplingTesting;   // The testing sample is sampled

      // BFGS variables
      Double_t        fLastAlpha;      // line search variable
      Double_t        fTau;            // line search variable
      Int_t           fResetStep;      // reset time (how often we clear hessian matrix)

      // backpropagation variable
      Double_t        fLearnRate;      // learning rate for synapse weight adjustments
      Double_t        fDecayRate;      // decay rate for above learning rate
      EBPTrainingMode fBPMode;         // backprop learning mode (sequential or batch)
      TString         fBpModeS;        // backprop learning mode option string (sequential or batch)
      Int_t           fBatchSize;      // batch size, only matters if in batch learning mode
      Int_t           fTestRate;       // test for overtraining performed at each #th epochs
      Bool_t          fEpochMon;       // create and fill epoch-wise monitoring histograms (makes outputfile big!)
      
      // genetic algorithm variables
      Int_t           fGA_nsteps;      // GA settings: number of steps
      Int_t           fGA_preCalc;     // GA settings: number of pre-calc steps
      Int_t           fGA_SC_steps;    // GA settings: SC_steps
      Int_t           fGA_SC_rate; // GA settings: SC_rate
      Double_t        fGA_SC_factor;   // GA settings: SC_factor

      // regression, storage of deviations
      std::vector<std::pair<Float_t,Float_t> >* fDeviationsFromTargets; // deviation from the targets, event weight

      Float_t         fWeightRange;    // suppress outliers for the estimator calculation

#ifdef MethodMLP_UseMinuit__
      // minuit variables -- commented out because they rely on a static pointer
      Int_t          fNumberOfWeights; // Minuit: number of weights
      static MethodMLP* fgThis;        // Minuit: this pointer
#endif

      // debugging flags
      static const Int_t  fgPRINT_ESTIMATOR_INC = 10;     // debug flags
      static const Bool_t fgPRINT_SEQ           = kFALSE; // debug flags
      static const Bool_t fgPRINT_BATCH         = kFALSE; // debug flags

      ClassDef(MethodMLP,0) // Multi-layer perceptron implemented specifically for TMVA
   };

} // namespace TMVA

#endif
 MethodMLP.h:1
 MethodMLP.h:2
 MethodMLP.h:3
 MethodMLP.h:4
 MethodMLP.h:5
 MethodMLP.h:6
 MethodMLP.h:7
 MethodMLP.h:8
 MethodMLP.h:9
 MethodMLP.h:10
 MethodMLP.h:11
 MethodMLP.h:12
 MethodMLP.h:13
 MethodMLP.h:14
 MethodMLP.h:15
 MethodMLP.h:16
 MethodMLP.h:17
 MethodMLP.h:18
 MethodMLP.h:19
 MethodMLP.h:20
 MethodMLP.h:21
 MethodMLP.h:22
 MethodMLP.h:23
 MethodMLP.h:24
 MethodMLP.h:25
 MethodMLP.h:26
 MethodMLP.h:27
 MethodMLP.h:28
 MethodMLP.h:29
 MethodMLP.h:30
 MethodMLP.h:31
 MethodMLP.h:32
 MethodMLP.h:33
 MethodMLP.h:34
 MethodMLP.h:35
 MethodMLP.h:36
 MethodMLP.h:37
 MethodMLP.h:38
 MethodMLP.h:39
 MethodMLP.h:40
 MethodMLP.h:41
 MethodMLP.h:42
 MethodMLP.h:43
 MethodMLP.h:44
 MethodMLP.h:45
 MethodMLP.h:46
 MethodMLP.h:47
 MethodMLP.h:48
 MethodMLP.h:49
 MethodMLP.h:50
 MethodMLP.h:51
 MethodMLP.h:52
 MethodMLP.h:53
 MethodMLP.h:54
 MethodMLP.h:55
 MethodMLP.h:56
 MethodMLP.h:57
 MethodMLP.h:58
 MethodMLP.h:59
 MethodMLP.h:60
 MethodMLP.h:61
 MethodMLP.h:62
 MethodMLP.h:63
 MethodMLP.h:64
 MethodMLP.h:65
 MethodMLP.h:66
 MethodMLP.h:67
 MethodMLP.h:68
 MethodMLP.h:69
 MethodMLP.h:70
 MethodMLP.h:71
 MethodMLP.h:72
 MethodMLP.h:73
 MethodMLP.h:74
 MethodMLP.h:75
 MethodMLP.h:76
 MethodMLP.h:77
 MethodMLP.h:78
 MethodMLP.h:79
 MethodMLP.h:80
 MethodMLP.h:81
 MethodMLP.h:82
 MethodMLP.h:83
 MethodMLP.h:84
 MethodMLP.h:85
 MethodMLP.h:86
 MethodMLP.h:87
 MethodMLP.h:88
 MethodMLP.h:89
 MethodMLP.h:90
 MethodMLP.h:91
 MethodMLP.h:92
 MethodMLP.h:93
 MethodMLP.h:94
 MethodMLP.h:95
 MethodMLP.h:96
 MethodMLP.h:97
 MethodMLP.h:98
 MethodMLP.h:99
 MethodMLP.h:100
 MethodMLP.h:101
 MethodMLP.h:102
 MethodMLP.h:103
 MethodMLP.h:104
 MethodMLP.h:105
 MethodMLP.h:106
 MethodMLP.h:107
 MethodMLP.h:108
 MethodMLP.h:109
 MethodMLP.h:110
 MethodMLP.h:111
 MethodMLP.h:112
 MethodMLP.h:113
 MethodMLP.h:114
 MethodMLP.h:115
 MethodMLP.h:116
 MethodMLP.h:117
 MethodMLP.h:118
 MethodMLP.h:119
 MethodMLP.h:120
 MethodMLP.h:121
 MethodMLP.h:122
 MethodMLP.h:123
 MethodMLP.h:124
 MethodMLP.h:125
 MethodMLP.h:126
 MethodMLP.h:127
 MethodMLP.h:128
 MethodMLP.h:129
 MethodMLP.h:130
 MethodMLP.h:131
 MethodMLP.h:132
 MethodMLP.h:133
 MethodMLP.h:134
 MethodMLP.h:135
 MethodMLP.h:136
 MethodMLP.h:137
 MethodMLP.h:138
 MethodMLP.h:139
 MethodMLP.h:140
 MethodMLP.h:141
 MethodMLP.h:142
 MethodMLP.h:143
 MethodMLP.h:144
 MethodMLP.h:145
 MethodMLP.h:146
 MethodMLP.h:147
 MethodMLP.h:148
 MethodMLP.h:149
 MethodMLP.h:150
 MethodMLP.h:151
 MethodMLP.h:152
 MethodMLP.h:153
 MethodMLP.h:154
 MethodMLP.h:155
 MethodMLP.h:156
 MethodMLP.h:157
 MethodMLP.h:158
 MethodMLP.h:159
 MethodMLP.h:160
 MethodMLP.h:161
 MethodMLP.h:162
 MethodMLP.h:163
 MethodMLP.h:164
 MethodMLP.h:165
 MethodMLP.h:166
 MethodMLP.h:167
 MethodMLP.h:168
 MethodMLP.h:169
 MethodMLP.h:170
 MethodMLP.h:171
 MethodMLP.h:172
 MethodMLP.h:173
 MethodMLP.h:174
 MethodMLP.h:175
 MethodMLP.h:176
 MethodMLP.h:177
 MethodMLP.h:178
 MethodMLP.h:179
 MethodMLP.h:180
 MethodMLP.h:181
 MethodMLP.h:182
 MethodMLP.h:183
 MethodMLP.h:184
 MethodMLP.h:185
 MethodMLP.h:186
 MethodMLP.h:187
 MethodMLP.h:188
 MethodMLP.h:189
 MethodMLP.h:190
 MethodMLP.h:191
 MethodMLP.h:192
 MethodMLP.h:193
 MethodMLP.h:194
 MethodMLP.h:195
 MethodMLP.h:196
 MethodMLP.h:197
 MethodMLP.h:198
 MethodMLP.h:199
 MethodMLP.h:200
 MethodMLP.h:201
 MethodMLP.h:202
 MethodMLP.h:203
 MethodMLP.h:204
 MethodMLP.h:205
 MethodMLP.h:206
 MethodMLP.h:207
 MethodMLP.h:208
 MethodMLP.h:209
 MethodMLP.h:210
 MethodMLP.h:211
 MethodMLP.h:212
 MethodMLP.h:213
 MethodMLP.h:214
 MethodMLP.h:215
 MethodMLP.h:216
 MethodMLP.h:217
 MethodMLP.h:218
 MethodMLP.h:219
 MethodMLP.h:220
 MethodMLP.h:221
 MethodMLP.h:222
 MethodMLP.h:223
 MethodMLP.h:224
 MethodMLP.h:225
 MethodMLP.h:226
 MethodMLP.h:227
 MethodMLP.h:228
 MethodMLP.h:229
 MethodMLP.h:230
 MethodMLP.h:231
 MethodMLP.h:232
 MethodMLP.h:233
 MethodMLP.h:234
 MethodMLP.h:235
 MethodMLP.h:236
 MethodMLP.h:237
 MethodMLP.h:238
 MethodMLP.h:239
 MethodMLP.h:240
 MethodMLP.h:241
 MethodMLP.h:242
 MethodMLP.h:243
 MethodMLP.h:244
 MethodMLP.h:245
 MethodMLP.h:246
 MethodMLP.h:247
 MethodMLP.h:248
 MethodMLP.h:249
 MethodMLP.h:250
 MethodMLP.h:251