Logo ROOT  
Reference Guide
MethodMLP.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard v. Toerne, Jan Therhaag, Jiahang Zhong
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodMLP *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * ANN Multilayer Perceptron class for the discrimination of signal *
12  * from background. BFGS implementation based on TMultiLayerPerceptron *
13  * class from ROOT (http://root.cern.ch). *
14  * *
15  * Authors (alphabetical): *
16  * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19  * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20  * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25  * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26  * *
27  * Copyright (c) 2005-2011: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * MPI-K Heidelberg, Germany *
31  * U. of Bonn, Germany *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 /*! \class TMVA::MethodMLP
39 \ingroup TMVA
40 
41 Multilayer Perceptron class built off of MethodANNBase
42 
43 */
44 
45 #include "TMVA/MethodMLP.h"
46 
47 #include "TMVA/Config.h"
48 #include "TMVA/Configurable.h"
49 #include "TMVA/ConvergenceTest.h"
50 #include "TMVA/ClassifierFactory.h"
51 #include "TMVA/DataSet.h"
52 #include "TMVA/DataSetInfo.h"
53 #include "TMVA/FitterBase.h"
54 #include "TMVA/GeneticFitter.h"
55 #include "TMVA/IFitterTarget.h"
56 #include "TMVA/IMethod.h"
57 #include "TMVA/Interval.h"
58 #include "TMVA/MethodANNBase.h"
59 #include "TMVA/MsgLogger.h"
60 #include "TMVA/TNeuron.h"
61 #include "TMVA/TSynapse.h"
62 #include "TMVA/Timer.h"
63 #include "TMVA/Tools.h"
64 #include "TMVA/Types.h"
65 
66 #include "TH1.h"
67 #include "TString.h"
68 #include "TFitter.h"
69 #include "TMatrixD.h"
70 #include "TMath.h"
71 
72 #include <iostream>
73 #include <cmath>
74 #include <vector>
75 
76 #ifdef MethodMLP_UseMinuit__
77 TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
78 Bool_t MethodMLP_UseMinuit = kTRUE;
79 #endif
80 
81 REGISTER_METHOD(MLP)
82 
84 
85  using std::vector;
86 
87 ////////////////////////////////////////////////////////////////////////////////
88 /// standard constructor
89 
91  const TString& methodTitle,
92  DataSetInfo& theData,
93  const TString& theOption)
94  : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption),
95  fUseRegulator(false), fCalculateErrors(false),
96  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
97  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
98  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
99  fSamplingTraining(false), fSamplingTesting(false),
100  fLastAlpha(0.0), fTau(0.),
101  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
102  fBPMode(kSequential), fBpModeS("None"),
103  fBatchSize(0), fTestRate(0), fEpochMon(false),
104  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
105  fGA_SC_rate(0), fGA_SC_factor(0.0),
106  fDeviationsFromTargets(0),
107  fWeightRange (1.0)
108 {
109 
110 }
111 
112 ////////////////////////////////////////////////////////////////////////////////
113 /// constructor from a weight file
114 
116  const TString& theWeightFile)
117  : MethodANNBase( Types::kMLP, theData, theWeightFile),
118  fUseRegulator(false), fCalculateErrors(false),
119  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
120  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
121  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
122  fSamplingTraining(false), fSamplingTesting(false),
123  fLastAlpha(0.0), fTau(0.),
124  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
125  fBPMode(kSequential), fBpModeS("None"),
126  fBatchSize(0), fTestRate(0), fEpochMon(false),
127  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
128  fGA_SC_rate(0), fGA_SC_factor(0.0),
129  fDeviationsFromTargets(0),
130  fWeightRange (1.0)
131 {
132 }
133 
134 ////////////////////////////////////////////////////////////////////////////////
135 /// destructor
136 /// nothing to be done
137 
139 {
140 }
141 
143 {
144  Train(NumCycles());
145 }
146 
147 
148 
149 ////////////////////////////////////////////////////////////////////////////////
150 /// MLP can handle classification with 2 classes and regression with one regression-target
151 
153 {
154  if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
155  if (type == Types::kMulticlass ) return kTRUE;
156  if (type == Types::kRegression ) return kTRUE;
157 
158  return kFALSE;
159 }
160 
161 ////////////////////////////////////////////////////////////////////////////////
162 /// default initializations
163 
165 {
166  // the minimum requirement to declare an event signal-like
167  SetSignalReferenceCut( 0.5 );
168 #ifdef MethodMLP_UseMinuit__
169  fgThis = this;
170 #endif
171 }
172 
173 ////////////////////////////////////////////////////////////////////////////////
174 /// define the options (their key words) that can be set in the option string
175 ///
176 /// know options:
177 ///
178 /// - TrainingMethod <string> Training method
179 /// available values are:
180 /// - BP Back-Propagation <default>
181 /// - GA Genetic Algorithm (takes a LONG time)
182 ///
183 /// - LearningRate <float> NN learning rate parameter
184 /// - DecayRate <float> Decay rate for learning parameter
185 /// - TestRate <int> Test for overtraining performed at each #th epochs
186 ///
187 /// - BPMode <string> Back-propagation learning mode
188 /// available values are:
189 /// - sequential <default>
190 /// - batch
191 ///
192 /// - BatchSize <int> Batch size: number of events/batch, only set if in Batch Mode,
193 /// - -1 for BatchSize=number_of_events
194 
196 {
197  DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
198  "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
199  AddPreDefVal(TString("BP"));
200  AddPreDefVal(TString("GA"));
201  AddPreDefVal(TString("BFGS"));
202 
203  DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
204  DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
205  DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
206  DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
207 
208  DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
209  DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
210  DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
211 
212  DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
213  DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
214 
215  DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
216  DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
217 
218  DeclareOptionRef(fBpModeS="sequential", "BPMode",
219  "Back-propagation learning mode: sequential or batch");
220  AddPreDefVal(TString("sequential"));
221  AddPreDefVal(TString("batch"));
222 
223  DeclareOptionRef(fBatchSize=-1, "BatchSize",
224  "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
225 
226  DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
227  "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
228 
229  DeclareOptionRef(fSteps=-1, "ConvergenceTests",
230  "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
231 
232  DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
233  "Use regulator to avoid over-training"); //zjh
234  DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit",
235  "Maximum times of regulator update"); //zjh
236  DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
237  "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh
238 
239  DeclareOptionRef(fWeightRange=1.0, "WeightRange",
240  "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
241 
242 }
243 
244 ////////////////////////////////////////////////////////////////////////////////
245 /// process user options
246 
248 {
250 
251 
252  if (IgnoreEventsWithNegWeightsInTraining()) {
253  Log() << kINFO
254  << "Will ignore negative events in training!"
255  << Endl;
256  }
257 
258 
259  if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
260  else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
261  else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
262 
263  if (fBpModeS == "sequential") fBPMode = kSequential;
264  else if (fBpModeS == "batch") fBPMode = kBatch;
265 
266  // InitializeLearningRates();
267 
268  if (fBPMode == kBatch) {
269  Data()->SetCurrentType(Types::kTraining);
270  Int_t numEvents = Data()->GetNEvents();
271  if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
272  }
273 }
274 
275 ////////////////////////////////////////////////////////////////////////////////
276 /// initialize learning rates of synapses, used only by back propagation
277 
279 {
280  Log() << kDEBUG << "Initialize learning rates" << Endl;
281  TSynapse *synapse;
282  Int_t numSynapses = fSynapses->GetEntriesFast();
283  for (Int_t i = 0; i < numSynapses; i++) {
284  synapse = (TSynapse*)fSynapses->At(i);
285  synapse->SetLearningRate(fLearnRate);
286  }
287 }
288 
289 ////////////////////////////////////////////////////////////////////////////////
290 /// calculate the estimator that training is attempting to minimize
291 
293 {
294  // sanity check
295  if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
296  Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
297  }
298 
299  Types::ETreeType saveType = Data()->GetCurrentType();
300  Data()->SetCurrentType(treeType);
301 
302  // if epochs are counted create monitoring histograms (only available for classification)
303  TString type = (treeType == Types::kTraining ? "train" : "test");
304  TString name = Form("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
305  TString nameB = name + "_B";
306  TString nameS = name + "_S";
307  Int_t nbin = 100;
308  Float_t limit = 2;
309  TH1* histS = 0;
310  TH1* histB = 0;
311  if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
312  histS = new TH1F( nameS, nameS, nbin, -limit, limit );
313  histB = new TH1F( nameB, nameB, nbin, -limit, limit );
314  }
315 
316  Double_t estimator = 0;
317 
318  // loop over all training events
319  Int_t nEvents = GetNEvents();
320  UInt_t nClasses = DataInfo().GetNClasses();
321  UInt_t nTgts = DataInfo().GetNTargets();
322 
323 
324  Float_t sumOfWeights = 0.f;
325  if( fWeightRange < 1.f ){
326  fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents);
327  }
328 
329  for (Int_t i = 0; i < nEvents; i++) {
330 
331  const Event* ev = GetEvent(i);
332 
333  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
334  && (saveType == Types::kTraining)){
335  continue;
336  }
337 
338  Double_t w = ev->GetWeight();
339 
340  ForceNetworkInputs( ev );
341  ForceNetworkCalculations();
342 
343  Double_t d = 0, v = 0;
344  if (DoRegression()) {
345  for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
346  v = GetOutputNeuron( itgt )->GetActivationValue();
347  Double_t targetValue = ev->GetTarget( itgt );
348  Double_t dt = v - targetValue;
349  d += (dt*dt);
350  }
351  estimator += d*w;
352  } else if (DoMulticlass() ) {
353  UInt_t cls = ev->GetClass();
354  if (fEstimator==kCE){
355  Double_t norm(0);
356  for (UInt_t icls = 0; icls < nClasses; icls++) {
357  Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
358  norm += exp( activationValue );
359  if(icls==cls)
360  d = exp( activationValue );
361  }
362  d = -TMath::Log(d/norm);
363  }
364  else{
365  for (UInt_t icls = 0; icls < nClasses; icls++) {
366  Double_t desired = (icls==cls) ? 1.0 : 0.0;
367  v = GetOutputNeuron( icls )->GetActivationValue();
368  d = (desired-v)*(desired-v);
369  }
370  }
371  estimator += d*w; //zjh
372  } else {
373  Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
374  v = GetOutputNeuron()->GetActivationValue();
375  if (fEstimator==kMSE) d = (desired-v)*(desired-v); //zjh
376  else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v)); //zjh
377  estimator += d*w; //zjh
378  }
379 
380  if( fDeviationsFromTargets )
381  fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
382 
383  sumOfWeights += w;
384 
385 
386  // fill monitoring histograms
387  if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
388  else if (histB != 0) histB->Fill( float(v), float(w) );
389  }
390 
391 
392  if( fDeviationsFromTargets ) {
393  std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
394 
395  Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
396  estimator = 0.f;
397 
398  Float_t weightRangeCut = fWeightRange*sumOfWeights;
399  Float_t weightSum = 0.f;
400  for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
401  float deviation = (*itDev).first;
402  float devWeight = (*itDev).second;
403  weightSum += devWeight; // add the weight of this event
404  if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange
405  estimator += devWeight*deviation;
406  }
407  }
408 
409  sumOfWeights = sumOfWeightsInRange;
410  delete fDeviationsFromTargets;
411  }
412 
413  if (histS != 0) fEpochMonHistS.push_back( histS );
414  if (histB != 0) fEpochMonHistB.push_back( histB );
415 
416  //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
417  //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
418  //else estimator = estimator*0.5/Float_t(nEvents);
419  estimator = estimator/Float_t(sumOfWeights);
420 
421 
422  //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh
423 
424  Data()->SetCurrentType( saveType );
425 
426  // provide epoch-wise monitoring
427  if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
428  CreateWeightMonitoringHists( Form("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
429  }
430 
431  return estimator;
432 }
433 
434 ////////////////////////////////////////////////////////////////////////////////
435 
437 {
438  if (fNetwork == 0) {
439  //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
440  Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
441  SetAnalysisType(GetAnalysisType());
442  }
443  Log() << kDEBUG << "reinitialize learning rates" << Endl;
444  InitializeLearningRates();
445  Log() << kHEADER;
446  PrintMessage("Training Network");
447  Log() << Endl;
448  Int_t nEvents=GetNEvents();
449  Int_t nSynapses=fSynapses->GetEntriesFast();
450  if (nSynapses>nEvents)
451  Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
452 
453  fIPyMaxIter = nEpochs;
454  if (fInteractive && fInteractive->NotInitialized()){
455  std::vector<TString> titles = {"Error on training set", "Error on test set"};
456  fInteractive->Init(titles);
457  }
458 
459 #ifdef MethodMLP_UseMinuit__
460  if (useMinuit) MinuitMinimize();
461 #else
462  if (fTrainingMethod == kGA) GeneticMinimize();
463  else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
464  else BackPropagationMinimize(nEpochs);
465 #endif
466 
467  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
468  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
469  if (fUseRegulator){
470  Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
471  UpdateRegulators();
472  Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
473  }
474 
475  if( fCalculateErrors || fUseRegulator )
476  {
477  Int_t numSynapses=fSynapses->GetEntriesFast();
478  fInvHessian.ResizeTo(numSynapses,numSynapses);
479  GetApproxInvHessian( fInvHessian ,false);
480  }
481  ExitFromTraining();
482 }
483 
484 ////////////////////////////////////////////////////////////////////////////////
485 /// train network with BFGS algorithm
486 
488 {
489  Timer timer( (fSteps>0?100:nEpochs), GetName() );
490 
491  // create histograms for overtraining monitoring
492  Int_t nbinTest = Int_t(nEpochs/fTestRate);
493  if(!IsSilentFile())
494  {
495  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
496  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
497  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
498  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
499  }
500 
501  Int_t nSynapses = fSynapses->GetEntriesFast();
502  Int_t nWeights = nSynapses;
503 
504  for (Int_t i=0;i<nSynapses;i++) {
505  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
506  synapse->SetDEDw(0.0);
507  }
508 
509  std::vector<Double_t> buffer( nWeights );
510  for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
511 
512  TMatrixD Dir ( nWeights, 1 );
513  TMatrixD Hessian ( nWeights, nWeights );
514  TMatrixD Gamma ( nWeights, 1 );
515  TMatrixD Delta ( nWeights, 1 );
516  Int_t RegUpdateCD=0; //zjh
517  Int_t RegUpdateTimes=0; //zjh
518  Double_t AccuError=0;
519 
520  Double_t trainE = -1;
521  Double_t testE = -1;
522 
523  fLastAlpha = 0.;
524 
525  if(fSamplingTraining || fSamplingTesting)
526  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
527 
528  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
529  timer.DrawProgressBar( 0 );
530 
531  // start training cycles (epochs)
532  for (Int_t i = 0; i < nEpochs; i++) {
533 
534  if (fExitFromTraining) break;
535  fIPyCurrentIter = i;
536  if (Float_t(i)/nEpochs < fSamplingEpoch) {
537  if ((i+1)%fTestRate == 0 || (i == 0)) {
538  if (fSamplingTraining) {
539  Data()->SetCurrentType( Types::kTraining );
540  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
541  Data()->CreateSampling();
542  }
543  if (fSamplingTesting) {
544  Data()->SetCurrentType( Types::kTesting );
545  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
546  Data()->CreateSampling();
547  }
548  }
549  }
550  else {
551  Data()->SetCurrentType( Types::kTraining );
552  Data()->InitSampling(1.0,1.0);
553  Data()->SetCurrentType( Types::kTesting );
554  Data()->InitSampling(1.0,1.0);
555  }
556  Data()->SetCurrentType( Types::kTraining );
557 
558  //zjh
559  if (fUseRegulator) {
560  UpdatePriors();
561  RegUpdateCD++;
562  }
563  //zjh
564 
565  SetGammaDelta( Gamma, Delta, buffer );
566 
567  if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
568  SteepestDir( Dir );
569  Hessian.UnitMatrix();
570  RegUpdateCD=0; //zjh
571  }
572  else {
573  if (GetHessian( Hessian, Gamma, Delta )) {
574  SteepestDir( Dir );
575  Hessian.UnitMatrix();
576  RegUpdateCD=0; //zjh
577  }
578  else SetDir( Hessian, Dir );
579  }
580 
581  Double_t dError=0; //zjh
582  if (DerivDir( Dir ) > 0) {
583  SteepestDir( Dir );
584  Hessian.UnitMatrix();
585  RegUpdateCD=0; //zjh
586  }
587  if (LineSearch( Dir, buffer, &dError )) { //zjh
588  Hessian.UnitMatrix();
589  SteepestDir( Dir );
590  RegUpdateCD=0; //zjh
591  if (LineSearch(Dir, buffer, &dError)) { //zjh
592  i = nEpochs;
593  Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
594  }
595  }
596 
597  //zjh+
598  if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
599  AccuError+=dError;
600 
601  if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
602  Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
603  UpdateRegulators();
604  Hessian.UnitMatrix();
605  RegUpdateCD=0;
606  RegUpdateTimes++;
607  AccuError=0;
608  }
609  //zjh-
610 
611  // monitor convergence of training and control sample
612  if ((i+1)%fTestRate == 0) {
613  //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample //zjh
614  //testE = CalculateEstimator( Types::kTesting, i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
615  trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample //zjh
616  testE = CalculateEstimator( Types::kTesting, i ) ; // estimator for test sample //zjh
617  if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
618  if(!IsSilentFile()) //saved to see in TMVAGui, no needed without file
619  {
620  fEstimatorHistTrain->Fill( i+1, trainE );
621  fEstimatorHistTest ->Fill( i+1, testE );
622  }
623  Bool_t success = kFALSE;
624  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
625  success = kTRUE;
626  }
627  Data()->EventResult( success );
628 
629  SetCurrentValue( testE );
630  if (HasConverged()) {
631  if (Float_t(i)/nEpochs < fSamplingEpoch) {
632  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
633  i = newEpoch;
634  ResetConvergenceCounter();
635  }
636  else break;
637  }
638  }
639 
640  // draw progress
641  TString convText = Form( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh
642  if (fSteps > 0) {
643  Float_t progress = 0;
644  if (Float_t(i)/nEpochs < fSamplingEpoch)
645  // progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
646  progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
647  else
648  {
649  // progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
650  progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
651  }
652  Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
653  if (progress2>progress) progress=progress2; //zjh
654  timer.DrawProgressBar( Int_t(progress), convText );
655  }
656  else {
657  Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
658  if (progress<i) progress=i; //zjh
659  timer.DrawProgressBar( progress, convText ); //zjh
660  }
661 
662  // some verbose output
663  if (fgPRINT_SEQ) {
664  PrintNetwork();
665  WaitForKeyboard();
666  }
667  }
668 }
669 
670 ////////////////////////////////////////////////////////////////////////////////
671 
672 void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
673 {
674  Int_t nWeights = fSynapses->GetEntriesFast();
675 
676  Int_t IDX = 0;
677  Int_t nSynapses = fSynapses->GetEntriesFast();
678  for (Int_t i=0;i<nSynapses;i++) {
679  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
680  Gamma[IDX++][0] = -synapse->GetDEDw();
681  }
682 
683  for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
684 
685  ComputeDEDw();
686 
687  IDX = 0;
688  for (Int_t i=0;i<nSynapses;i++)
689  {
690  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
691  Gamma[IDX++][0] += synapse->GetDEDw();
692  }
693 }
694 
695 ////////////////////////////////////////////////////////////////////////////////
696 
698 {
699  Int_t nSynapses = fSynapses->GetEntriesFast();
700  for (Int_t i=0;i<nSynapses;i++) {
701  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
702  synapse->SetDEDw( 0.0 );
703  }
704 
705  Int_t nEvents = GetNEvents();
706  Int_t nPosEvents = nEvents;
707  for (Int_t i=0;i<nEvents;i++) {
708 
709  const Event* ev = GetEvent(i);
710  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
711  && (Data()->GetCurrentType() == Types::kTraining)){
712  --nPosEvents;
713  continue;
714  }
715 
716  SimulateEvent( ev );
717 
718  for (Int_t j=0;j<nSynapses;j++) {
719  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
720  synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
721  }
722  }
723 
724  for (Int_t i=0;i<nSynapses;i++) {
725  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
726  Double_t DEDw=synapse->GetDEDw(); //zjh
727  if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
728  synapse->SetDEDw( DEDw / nPosEvents ); //zjh
729  }
730 }
731 
732 ////////////////////////////////////////////////////////////////////////////////
733 
735 {
736  Double_t eventWeight = ev->GetWeight();
737 
738  ForceNetworkInputs( ev );
739  ForceNetworkCalculations();
740 
741  if (DoRegression()) {
742  UInt_t ntgt = DataInfo().GetNTargets();
743  for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
744  Double_t desired = ev->GetTarget(itgt);
745  Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
746  GetOutputNeuron( itgt )->SetError(error);
747  }
748  } else if (DoMulticlass()) {
749  UInt_t nClasses = DataInfo().GetNClasses();
750  UInt_t cls = ev->GetClass();
751  for (UInt_t icls = 0; icls < nClasses; icls++) {
752  Double_t desired = ( cls==icls ? 1.0 : 0.0 );
753  Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
754  GetOutputNeuron( icls )->SetError(error);
755  }
756  } else {
757  Double_t desired = GetDesiredOutput( ev );
758  Double_t error=-1; //zjh
759  if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight; //zjh
760  else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
761  GetOutputNeuron()->SetError(error);
762  }
763 
764  CalculateNeuronDeltas();
765  for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
766  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
767  synapse->InitDelta();
768  synapse->CalculateDelta();
769  }
770 }
771 
772 ////////////////////////////////////////////////////////////////////////////////
773 
775 {
776  Int_t IDX = 0;
777  Int_t nSynapses = fSynapses->GetEntriesFast();
778 
779  for (Int_t i=0;i<nSynapses;i++) {
780  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
781  Dir[IDX++][0] = -synapse->GetDEDw();
782  }
783 }
784 
785 ////////////////////////////////////////////////////////////////////////////////
786 
788 {
790  if ((Double_t) gd[0][0] == 0.) return kTRUE;
791  TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
792  TMatrixD tmp(Gamma, TMatrixD::kTransposeMult, Hessian);
794  Double_t a = 1 / (Double_t) gd[0][0];
795  Double_t f = 1 + ((Double_t)gHg[0][0]*a);
797  res *= f;
798  res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
800  res *= a;
801  Hessian += res;
802 
803  return kFALSE;
804 }
805 
806 ////////////////////////////////////////////////////////////////////////////////
807 
809 {
810  Int_t IDX = 0;
811  Int_t nSynapses = fSynapses->GetEntriesFast();
812  TMatrixD DEDw(nSynapses, 1);
813 
814  for (Int_t i=0;i<nSynapses;i++) {
815  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
816  DEDw[IDX++][0] = synapse->GetDEDw();
817  }
818 
819  dir = Hessian * DEDw;
820  for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
821 }
822 
823 ////////////////////////////////////////////////////////////////////////////////
824 
826 {
827  Int_t IDX = 0;
828  Int_t nSynapses = fSynapses->GetEntriesFast();
829  Double_t Result = 0.0;
830 
831  for (Int_t i=0;i<nSynapses;i++) {
832  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
833  Result += Dir[IDX++][0] * synapse->GetDEDw();
834  }
835  return Result;
836 }
837 
838 ////////////////////////////////////////////////////////////////////////////////
839 
840 Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
841 {
842  Int_t IDX = 0;
843  Int_t nSynapses = fSynapses->GetEntriesFast();
844  Int_t nWeights = nSynapses;
845 
846  std::vector<Double_t> Origin(nWeights);
847  for (Int_t i=0;i<nSynapses;i++) {
848  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
849  Origin[i] = synapse->GetWeight();
850  }
851 
852  Double_t err1 = GetError();
853  Double_t errOrigin=err1;//zjh
854  Double_t alpha1 = 0.;
855  Double_t alpha2 = fLastAlpha;
856 
857 
858  if (alpha2 < 0.01) alpha2 = 0.01;
859  else if (alpha2 > 2.0) alpha2 = 2.0;
860  Double_t alpha_original = alpha2;
861  Double_t alpha3 = alpha2;
862 
863  SetDirWeights( Origin, Dir, alpha2 );
864  Double_t err2 = GetError();
865  //Double_t err2 = err1;
866  Double_t err3 = err2;
867  Bool_t bingo = kFALSE;
868 
869 
870  if (err1 > err2) {
871  for (Int_t i=0;i<100;i++) {
872  alpha3 *= fTau;
873  SetDirWeights(Origin, Dir, alpha3);
874  err3 = GetError();
875  if (err3 > err2) {
876  bingo = kTRUE;
877  break;
878  }
879  alpha1 = alpha2;
880  err1 = err2;
881  alpha2 = alpha3;
882  err2 = err3;
883  }
884  if (!bingo) {
885  SetDirWeights(Origin, Dir, 0.);
886  return kTRUE;
887  }
888  }
889  else {
890  for (Int_t i=0;i<100;i++) {
891  alpha2 /= fTau;
892  if (i==50) {
893  Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
894  alpha2 = -alpha_original;
895  }
896  SetDirWeights(Origin, Dir, alpha2);
897  err2 = GetError();
898  if (err1 > err2) {
899  bingo = kTRUE;
900  break;
901  }
902  alpha3 = alpha2;
903  err3 = err2;
904  }
905  if (!bingo) {
906  SetDirWeights(Origin, Dir, 0.);
907  Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
908  fLastAlpha = 0.05;
909  return kTRUE;
910  }
911  }
912 
913  if (alpha1>0 && alpha2>0 && alpha3 > 0) {
914  fLastAlpha = 0.5 * (alpha1 + alpha3 -
915  (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
916  - ( err2 - err1 ) / (alpha2 - alpha1 )));
917  }
918  else {
919  fLastAlpha = alpha2;
920  }
921 
922  fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
923 
924  SetDirWeights(Origin, Dir, fLastAlpha);
925 
926  // leaving these lines uncommented is a heavy price to pay for only a warning message
927  // (which shouldn't appear anyway)
928  // --> about 15% of time is spent in the final GetError().
929  //
930  Double_t finalError = GetError();
931  if (finalError > err1) {
932  Log() << kWARNING << "Line search increased error! Something is wrong."
933  << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
934  << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
935  }
936 
937  for (Int_t i=0;i<nSynapses;i++) {
938  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
939  buffer[IDX] = synapse->GetWeight() - Origin[IDX];
940  IDX++;
941  }
942 
943  if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
944 
945  return kFALSE;
946 }
947 
948 ////////////////////////////////////////////////////////////////////////////////
949 
950 void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
951 {
952  Int_t IDX = 0;
953  Int_t nSynapses = fSynapses->GetEntriesFast();
954 
955  for (Int_t i=0;i<nSynapses;i++) {
956  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
957  synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
958  IDX++;
959  }
960  if (fUseRegulator) UpdatePriors();//zjh
961 }
962 
963 
964 ////////////////////////////////////////////////////////////////////////////////
965 
967 {
968  Int_t nEvents = GetNEvents();
969  UInt_t ntgts = GetNTargets();
970  Double_t Result = 0.;
971 
972  for (Int_t i=0;i<nEvents;i++) {
973  const Event* ev = GetEvent(i);
974 
975  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
976  && (Data()->GetCurrentType() == Types::kTraining)){
977  continue;
978  }
979  SimulateEvent( ev );
980 
981  Double_t error = 0.;
982  if (DoRegression()) {
983  for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
984  error += GetMSEErr( ev, itgt );//zjh
985  }
986  } else if ( DoMulticlass() ){
987  for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
988  error += GetMSEErr( ev, icls );
989  }
990  } else {
991  if (fEstimator==kMSE) error = GetMSEErr( ev ); //zjh
992  else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
993  }
994  Result += error * ev->GetWeight();
995  }
996  if (fUseRegulator) Result+=fPrior; //zjh
997  if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
998  return Result;
999 }
1000 
1001 ////////////////////////////////////////////////////////////////////////////////
1002 
1004 {
1005  Double_t error = 0;
1006  Double_t output = GetOutputNeuron( index )->GetActivationValue();
1007  Double_t target = 0;
1008  if (DoRegression()) target = ev->GetTarget( index );
1009  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1010  else target = GetDesiredOutput( ev );
1011 
1012  error = 0.5*(output-target)*(output-target); //zjh
1013 
1014  return error;
1015 
1016 }
1017 
1018 ////////////////////////////////////////////////////////////////////////////////
1019 
1021 {
1022  Double_t error = 0;
1023  Double_t output = GetOutputNeuron( index )->GetActivationValue();
1024  Double_t target = 0;
1025  if (DoRegression()) target = ev->GetTarget( index );
1026  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1027  else target = GetDesiredOutput( ev );
1028 
1029  error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
1030 
1031  return error;
1032 }
1033 
1034 ////////////////////////////////////////////////////////////////////////////////
1035 /// minimize estimator / train network with back propagation algorithm
1036 
1038 {
1039  // Timer timer( nEpochs, GetName() );
1040  Timer timer( (fSteps>0?100:nEpochs), GetName() );
1041  Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1042 
1043  // create histograms for overtraining monitoring
1044  Int_t nbinTest = Int_t(nEpochs/fTestRate);
1045  if(!IsSilentFile())
1046  {
1047  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
1048  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1049  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
1050  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1051  }
1052  if(fSamplingTraining || fSamplingTesting)
1053  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
1054 
1055  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
1056  timer.DrawProgressBar(0);
1057 
1058  // estimators
1059  Double_t trainE = -1;
1060  Double_t testE = -1;
1061 
1062  // start training cycles (epochs)
1063  for (Int_t i = 0; i < nEpochs; i++) {
1064 
1065  if (fExitFromTraining) break;
1066  fIPyCurrentIter = i;
1067  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1068  if ((i+1)%fTestRate == 0 || (i == 0)) {
1069  if (fSamplingTraining) {
1070  Data()->SetCurrentType( Types::kTraining );
1071  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1072  Data()->CreateSampling();
1073  }
1074  if (fSamplingTesting) {
1075  Data()->SetCurrentType( Types::kTesting );
1076  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1077  Data()->CreateSampling();
1078  }
1079  }
1080  }
1081  else {
1082  Data()->SetCurrentType( Types::kTraining );
1083  Data()->InitSampling(1.0,1.0);
1084  Data()->SetCurrentType( Types::kTesting );
1085  Data()->InitSampling(1.0,1.0);
1086  }
1087  Data()->SetCurrentType( Types::kTraining );
1088 
1089  TrainOneEpoch();
1090  DecaySynapseWeights(i >= lateEpoch);
1091 
1092  // monitor convergence of training and control sample
1093  if ((i+1)%fTestRate == 0) {
1094  trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
1095  testE = CalculateEstimator( Types::kTesting, i ); // estimator for test sample
1096  if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1097  if(!IsSilentFile())
1098  {
1099  fEstimatorHistTrain->Fill( i+1, trainE );
1100  fEstimatorHistTest ->Fill( i+1, testE );
1101  }
1102  Bool_t success = kFALSE;
1103  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1104  success = kTRUE;
1105  }
1106  Data()->EventResult( success );
1107 
1108  SetCurrentValue( testE );
1109  if (HasConverged()) {
1110  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1111  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1112  i = newEpoch;
1113  ResetConvergenceCounter();
1114  }
1115  else {
1116  if (lateEpoch > i) lateEpoch = i;
1117  else break;
1118  }
1119  }
1120  }
1121 
1122  // draw progress bar (add convergence value)
1123  TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
1124  if (fSteps > 0) {
1125  Float_t progress = 0;
1126  if (Float_t(i)/nEpochs < fSamplingEpoch)
1127  progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1128  else
1129  progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1130 
1131  timer.DrawProgressBar( Int_t(progress), convText );
1132  }
1133  else {
1134  timer.DrawProgressBar( i, convText );
1135  }
1136  }
1137 }
1138 
1139 ////////////////////////////////////////////////////////////////////////////////
1140 /// train network over a single epoch/cycle of events
1141 
1143 {
1144  Int_t nEvents = Data()->GetNEvents();
1145 
1146  // randomize the order events will be presented, important for sequential mode
1147  Int_t* index = new Int_t[nEvents];
1148  for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1149  Shuffle(index, nEvents);
1150 
1151  // loop over all training events
1152  for (Int_t i = 0; i < nEvents; i++) {
1153 
1154  const Event * ev = GetEvent(index[i]);
1155  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1156  && (Data()->GetCurrentType() == Types::kTraining)){
1157  continue;
1158  }
1159 
1160  TrainOneEvent(index[i]);
1161 
1162  // do adjustments if in batch mode
1163  if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1164  AdjustSynapseWeights();
1165  if (fgPRINT_BATCH) {
1166  PrintNetwork();
1167  WaitForKeyboard();
1168  }
1169  }
1170 
1171  // debug in sequential mode
1172  if (fgPRINT_SEQ) {
1173  PrintNetwork();
1174  WaitForKeyboard();
1175  }
1176  }
1177 
1178  delete[] index;
1179 }
1180 
1181 ////////////////////////////////////////////////////////////////////////////////
1182 /// Input:
1183 /// - index: the array to shuffle
1184 /// - n: the size of the array
1185 /// Output:
1186 /// - index: the shuffled indexes
1187 ///
1188 /// This method is used for sequential training
1189 
1191 {
1192  Int_t j, k;
1193  Int_t a = n - 1;
1194  for (Int_t i = 0; i < n; i++) {
1195  j = (Int_t) (frgen->Rndm() * a);
1196  if (j<n){ // address the 'worries' of coverity
1197  k = index[j];
1198  index[j] = index[i];
1199  index[i] = k;
1200  }
1201  }
1202 }
1203 
1204 ////////////////////////////////////////////////////////////////////////////////
1205 /// decay synapse weights
1206 /// in last 10 epochs, lower learning rate even more to find a good minimum
1207 
1209 {
1210  TSynapse* synapse;
1211  Int_t numSynapses = fSynapses->GetEntriesFast();
1212  for (Int_t i = 0; i < numSynapses; i++) {
1213  synapse = (TSynapse*)fSynapses->At(i);
1214  if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square.
1215  else synapse->DecayLearningRate(fDecayRate);
1216  }
1217 }
1218 
1219 ////////////////////////////////////////////////////////////////////////////////
1220 /// fast per-event training
1221 
1223 {
1224  GetEvent(ievt);
1225 
1226  // as soon as we know how to get event weights, get that here
1227 
1228  // note: the normalization of event weights will affect the choice
1229  // of learning rate, one will have to experiment to get the right value.
1230  // in general, if the "average" event weight is 1, the learning rate
1231  // should be good if set around 0.02 (a good value if all event weights are 1)
1232  Double_t eventWeight = 1.0;
1233 
1234  // get the desired output of this event
1235  Double_t desired;
1236  if (type == 0) desired = fOutput->GetMin(); // background //zjh
1237  else desired = fOutput->GetMax(); // signal //zjh
1238 
1239  // force the value for each input neuron
1240  Double_t x;
1241  TNeuron* neuron;
1242 
1243  for (UInt_t j = 0; j < GetNvar(); j++) {
1244  x = branchVar[j];
1245  if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1246  neuron = GetInputNeuron(j);
1247  neuron->ForceValue(x);
1248  }
1249 
1250  ForceNetworkCalculations();
1251  UpdateNetwork(desired, eventWeight);
1252 }
1253 
1254 ////////////////////////////////////////////////////////////////////////////////
1255 /// train network over a single event
1256 /// this uses the new event model
1257 
1259 {
1260  // note: the normalization of event weights will affect the choice
1261  // of learning rate, one will have to experiment to get the right value.
1262  // in general, if the "average" event weight is 1, the learning rate
1263  // should be good if set around 0.02 (a good value if all event weights are 1)
1264 
1265  const Event * ev = GetEvent(ievt);
1266  Double_t eventWeight = ev->GetWeight();
1267  ForceNetworkInputs( ev );
1268  ForceNetworkCalculations();
1269  if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1270  if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1271  else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1272 }
1273 
1274 ////////////////////////////////////////////////////////////////////////////////
1275 /// get the desired output of this event
1276 
1278 {
1279  return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
1280 }
1281 
1282 ////////////////////////////////////////////////////////////////////////////////
1283 /// update the network based on how closely
1284 /// the output matched the desired output
1285 
1287 {
1288  Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1289  if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ; //zjh
1290  else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
1291  else Log() << kFATAL << "Estimator type unspecified!!" << Endl; //zjh
1292  error *= eventWeight;
1293  GetOutputNeuron()->SetError(error);
1294  CalculateNeuronDeltas();
1295  UpdateSynapses();
1296 }
1297 
1298 ////////////////////////////////////////////////////////////////////////////////
1299 /// update the network based on how closely
1300 /// the output matched the desired output
1301 
1302 void TMVA::MethodMLP::UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight)
1303 {
1304  // Norm for softmax
1305  Double_t norm = 0.;
1306  for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1307  Double_t act = GetOutputNeuron(i)->GetActivationValue();
1308  norm += TMath::Exp(act);
1309  }
1310 
1311  // Get output of network, and apply softmax
1312  for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1313  Double_t act = GetOutputNeuron(i)->GetActivationValue();
1314  Double_t output = TMath::Exp(act) / norm;
1315  Double_t error = output - desired.at(i);
1316  error *= eventWeight;
1317  GetOutputNeuron(i)->SetError(error);
1318  }
1319 
1320  // Do backpropagation
1321  CalculateNeuronDeltas();
1322  UpdateSynapses();
1323 }
1324 
1325 ////////////////////////////////////////////////////////////////////////////////
1326 /// have each neuron calculate its delta by back propagation
1327 
1329 {
1330  TNeuron* neuron;
1331  Int_t numNeurons;
1332  Int_t numLayers = fNetwork->GetEntriesFast();
1333  TObjArray* curLayer;
1334 
1335  // step backwards through the network (back propagation)
1336  // deltas calculated starting at output layer
1337  for (Int_t i = numLayers-1; i >= 0; i--) {
1338  curLayer = (TObjArray*)fNetwork->At(i);
1339  numNeurons = curLayer->GetEntriesFast();
1340 
1341  for (Int_t j = 0; j < numNeurons; j++) {
1342  neuron = (TNeuron*) curLayer->At(j);
1343  neuron->CalculateDelta();
1344  }
1345  }
1346 }
1347 
1348 ////////////////////////////////////////////////////////////////////////////////
1349 /// create genetics class similar to GeneticCut
1350 /// give it vector of parameter ranges (parameters = weights)
1351 /// link fitness function of this class to ComputeEstimator
1352 /// instantiate GA (see MethodCuts)
1353 /// run it
1354 /// then this should exist for GA, Minuit and random sampling
1355 
1357 {
1358  PrintMessage("Minimizing Estimator with GA");
1359 
1360  // define GA parameters
1361  fGA_preCalc = 1;
1362  fGA_SC_steps = 10;
1363  fGA_SC_rate = 5;
1364  fGA_SC_factor = 0.95;
1365  fGA_nsteps = 30;
1366 
1367  // ranges
1368  std::vector<Interval*> ranges;
1369 
1370  Int_t numWeights = fSynapses->GetEntriesFast();
1371  for (Int_t ivar=0; ivar< numWeights; ivar++) {
1372  ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1373  }
1374 
1375  FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
1376  gf->Run();
1377 
1378  Double_t estimator = CalculateEstimator();
1379  Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
1380 }
1381 
1382 ////////////////////////////////////////////////////////////////////////////////
1383 /// interface to the estimate
1384 
1385 Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1386 {
1387  return ComputeEstimator( parameters );
1388 }
1389 
1390 ////////////////////////////////////////////////////////////////////////////////
1391 /// this function is called by GeneticANN for GA optimization
1392 
1393 Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1394 {
1395  TSynapse* synapse;
1396  Int_t numSynapses = fSynapses->GetEntriesFast();
1397 
1398  for (Int_t i = 0; i < numSynapses; i++) {
1399  synapse = (TSynapse*)fSynapses->At(i);
1400  synapse->SetWeight(parameters.at(i));
1401  }
1402  if (fUseRegulator) UpdatePriors(); //zjh
1403 
1404  Double_t estimator = CalculateEstimator();
1405 
1406  return estimator;
1407 }
1408 
1409 ////////////////////////////////////////////////////////////////////////////////
1410 /// update synapse error fields and adjust the weights (if in sequential mode)
1411 
1413 {
1414  TNeuron* neuron;
1415  Int_t numNeurons;
1416  TObjArray* curLayer;
1417  Int_t numLayers = fNetwork->GetEntriesFast();
1418 
1419  for (Int_t i = 0; i < numLayers; i++) {
1420  curLayer = (TObjArray*)fNetwork->At(i);
1421  numNeurons = curLayer->GetEntriesFast();
1422 
1423  for (Int_t j = 0; j < numNeurons; j++) {
1424  neuron = (TNeuron*) curLayer->At(j);
1425  if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1426  else neuron->UpdateSynapsesSequential();
1427  }
1428  }
1429 }
1430 
1431 ////////////////////////////////////////////////////////////////////////////////
1432 /// just adjust the synapse weights (should be called in batch mode)
1433 
1435 {
1436  TNeuron* neuron;
1437  Int_t numNeurons;
1438  TObjArray* curLayer;
1439  Int_t numLayers = fNetwork->GetEntriesFast();
1440 
1441  for (Int_t i = numLayers-1; i >= 0; i--) {
1442  curLayer = (TObjArray*)fNetwork->At(i);
1443  numNeurons = curLayer->GetEntriesFast();
1444 
1445  for (Int_t j = 0; j < numNeurons; j++) {
1446  neuron = (TNeuron*) curLayer->At(j);
1447  neuron->AdjustSynapseWeights();
1448  }
1449  }
1450 }
1451 
1452 ////////////////////////////////////////////////////////////////////////////////
1453 
1455 {
1456  fPrior=0;
1457  fPriorDev.clear();
1458  Int_t nSynapses = fSynapses->GetEntriesFast();
1459  for (Int_t i=0;i<nSynapses;i++) {
1460  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1461  fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1462  fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1463  }
1464 }
1465 
1466 ////////////////////////////////////////////////////////////////////////////////
1467 
1469 {
1470  TMatrixD InvH(0,0);
1471  GetApproxInvHessian(InvH);
1472  Int_t numSynapses=fSynapses->GetEntriesFast();
1473  Int_t numRegulators=fRegulators.size();
1474  Float_t gamma=0,
1475  variance=1.; // Gaussian noise
1476  std::vector<Int_t> nWDP(numRegulators);
1477  std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1478  for (int i=0;i<numSynapses;i++) {
1479  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1480  Int_t idx=fRegulatorIdx[i];
1481  nWDP[idx]++;
1482  trace[idx]+=InvH[i][i];
1483  gamma+=1-fRegulators[idx]*InvH[i][i];
1484  weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1485  }
1486  if (fEstimator==kMSE) {
1487  if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1488  else variance=CalculateEstimator( Types::kTraining, 0 );
1489  }
1490 
1491  //Log() << kDEBUG << Endl;
1492  for (int i=0;i<numRegulators;i++)
1493  {
1494  //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
1495  fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1496  if (fRegulators[i]<0) fRegulators[i]=0;
1497  Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
1498  }
1499  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
1500  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
1501 
1502  Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
1503 
1504 }
1505 
1506 ////////////////////////////////////////////////////////////////////////////////
1507 
1508 void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate) //zjh
1509 {
1510  Int_t numSynapses=fSynapses->GetEntriesFast();
1511  InvHessian.ResizeTo( numSynapses, numSynapses );
1512  InvHessian=0;
1513  TMatrixD sens(numSynapses,1);
1514  TMatrixD sensT(1,numSynapses);
1515  Int_t nEvents = GetNEvents();
1516  for (Int_t i=0;i<nEvents;i++) {
1517  GetEvent(i);
1518  double outputValue=GetMvaValue(); // force calculation
1519  GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1520  CalculateNeuronDeltas();
1521  for (Int_t j = 0; j < numSynapses; j++){
1522  TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1523  synapses->InitDelta();
1524  synapses->CalculateDelta();
1525  sens[j][0]=sensT[0][j]=synapses->GetDelta();
1526  }
1527  if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1528  else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1529  }
1530 
1531  // TVectorD eValue(numSynapses);
1532  if (regulate) {
1533  for (Int_t i = 0; i < numSynapses; i++){
1534  InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1535  }
1536  }
1537  else {
1538  for (Int_t i = 0; i < numSynapses; i++){
1539  InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
1540  }
1541  }
1542 
1543  InvHessian.Invert();
1544 
1545 }
1546 
1547 ////////////////////////////////////////////////////////////////////////////////
1548 
1550 {
1551  Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation
1552 
1553  // no hessian (old training file) or no error requested
1554  if (!fCalculateErrors || errLower==0 || errUpper==0)
1555  return MvaValue;
1556 
1557  Double_t MvaUpper,MvaLower,median,variance;
1558  Int_t numSynapses=fSynapses->GetEntriesFast();
1559  if (fInvHessian.GetNcols()!=numSynapses) {
1560  Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
1561  }
1562  TMatrixD sens(numSynapses,1);
1563  TMatrixD sensT(1,numSynapses);
1564  GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1565  //GetOutputNeuron()->SetError(1.);
1566  CalculateNeuronDeltas();
1567  for (Int_t i = 0; i < numSynapses; i++){
1568  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1569  synapses->InitDelta();
1570  synapses->CalculateDelta();
1571  sensT[0][i]=synapses->GetDelta();
1572  }
1573  sens.Transpose(sensT);
1574  TMatrixD sig=sensT*fInvHessian*sens;
1575  variance=sig[0][0];
1576  median=GetOutputNeuron()->GetValue();
1577 
1578  if (variance<0) {
1579  Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl;
1580  variance=0;
1581  }
1582  variance=sqrt(variance);
1583 
1584  //upper
1585  MvaUpper=fOutput->Eval(median+variance);
1586  if(errUpper)
1587  *errUpper=MvaUpper-MvaValue;
1588 
1589  //lower
1590  MvaLower=fOutput->Eval(median-variance);
1591  if(errLower)
1592  *errLower=MvaValue-MvaLower;
1593 
1594  return MvaValue;
1595 }
1596 
1597 
1598 #ifdef MethodMLP_UseMinuit__
1599 
1600 ////////////////////////////////////////////////////////////////////////////////
1601 /// minimize using Minuit
1602 
1603 void TMVA::MethodMLP::MinuitMinimize()
1604 {
1605  fNumberOfWeights = fSynapses->GetEntriesFast();
1606 
1607  TFitter* tfitter = new TFitter( fNumberOfWeights );
1608 
1609  // minuit-specific settings
1610  Double_t args[10];
1611 
1612  // output level
1613  args[0] = 2; // put to 0 for results only, or to -1 for no garbage
1614  tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
1615  tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
1616 
1617  double w[54];
1618 
1619  // init parameters
1620  for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1621  TString parName = Form("w%i", ipar);
1622  tfitter->SetParameter( ipar,
1623  parName, w[ipar], 0.1, 0, 0 );
1624  }
1625 
1626  // define the CFN function
1627  tfitter->SetFCN( &IFCN );
1628 
1629  // define fit strategy
1630  args[0] = 2;
1631  tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
1632 
1633  // now do the fit !
1634  args[0] = 1e-04;
1635  tfitter->ExecuteCommand( "MIGRAD", args, 1 );
1636 
1637  Bool_t doBetter = kFALSE;
1638  Bool_t doEvenBetter = kFALSE;
1639  if (doBetter) {
1640  args[0] = 1e-04;
1641  tfitter->ExecuteCommand( "IMPROVE", args, 1 );
1642 
1643  if (doEvenBetter) {
1644  args[0] = 500;
1645  tfitter->ExecuteCommand( "MINOS", args, 1 );
1646  }
1647  }
1648 }
1649 
1650 ////////////////////////////////////////////////////////////////////////////////
1651 /// Evaluate the minimisation function
1652 ///
1653 /// Input parameters:
1654 /// - npars: number of currently variable parameters
1655 /// CAUTION: this is not (necessarily) the dimension of the fitPars vector !
1656 /// - fitPars: array of (constant and variable) parameters
1657 /// - iflag: indicates what is to be calculated (see example below)
1658 /// - grad: array of gradients
1659 ///
1660 /// Output parameters:
1661 /// - f: the calculated function value.
1662 /// - grad: the (optional) vector of first derivatives).
1663 
1664 void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1665 {
1666  ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1667 }
1668 
1669 TTHREAD_TLS(Int_t) nc = 0;
1670 TTHREAD_TLS(double) minf = 1000000;
1671 
1672 void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1673 {
1674  // first update the weights
1675  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1676  TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1677  synapse->SetWeight(fitPars[ipar]);
1678  }
1679 
1680  // now compute the estimator
1681  f = CalculateEstimator();
1682 
1683  nc++;
1684  if (f < minf) minf = f;
1685  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
1686  Log() << kDEBUG << Endl;
1687  Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
1688 }
1689 
1690 ////////////////////////////////////////////////////////////////////////////////
1691 /// global "this" pointer to be used in minuit
1692 
1693 TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1694 {
1695  return fgThis;
1696 }
1697 
1698 #endif
1699 
1700 
1701 ////////////////////////////////////////////////////////////////////////////////
1702 /// write specific classifier response
1703 
1704 void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1705 {
1706  MethodANNBase::MakeClassSpecific(fout, className);
1707 }
1708 
1709 ////////////////////////////////////////////////////////////////////////////////
1710 /// get help message text
1711 ///
1712 /// typical length of text line:
1713 /// "|--------------------------------------------------------------|"
1714 
1716 {
1717  TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1718  TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1719 
1720  Log() << Endl;
1721  Log() << col << "--- Short description:" << colres << Endl;
1722  Log() << Endl;
1723  Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1724  Log() << "forward multilayer perceptron implementation. The MLP has a user-" << Endl;
1725  Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
1726  Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1727  Log() << "signal and one background). " << Endl;
1728  Log() << Endl;
1729  Log() << col << "--- Performance optimisation:" << colres << Endl;
1730  Log() << Endl;
1731  Log() << "Neural networks are stable and performing for a large variety of " << Endl;
1732  Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
1733  Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1734  Log() << "number of input variables that have only little discrimination power. " << Endl;
1735  Log() << "" << Endl;
1736  Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1737  Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1738  Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1739  Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1740  Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
1741  Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
1742  Log() << "competitive results)." << Endl;
1743  Log() << Endl;
1744  Log() << col << "Overtraining: " << colres
1745  << "only the TMlpANN performs an explicit separation of the" << Endl;
1746  Log() << "full training sample into independent training and validation samples." << Endl;
1747  Log() << "We have found that in most high-energy physics applications the " << Endl;
1748  Log() << "available degrees of freedom (training events) are sufficient to " << Endl;
1749  Log() << "constrain the weights of the relatively simple architectures required" << Endl;
1750  Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
1751  Log() << "the use of validation samples would only reduce the available training" << Endl;
1752  Log() << "information. However, if the performance on the training sample is " << Endl;
1753  Log() << "found to be significantly better than the one found with the inde-" << Endl;
1754  Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
1755  Log() << "are printed to standard output at the end of each training job." << Endl;
1756  Log() << Endl;
1757  Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
1758  Log() << Endl;
1759  Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
1760  Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1761  Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
1762  Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1763  Log() << "in favour of more neurons in the first hidden layer." << Endl;
1764  Log() << "" << Endl;
1765  Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
1766  Log() << "adjustable weights is small compared to the training sample size," << Endl;
1767  Log() << "using a large number of training samples should not lead to overtraining." << Endl;
1768 }
1769 
ROOT::Math::Cephes::gamma
double gamma(double x)
Definition: SpecFuncCephes.cxx:339
TMVA::MethodMLP
Multilayer Perceptron class built off of MethodANNBase.
Definition: MethodMLP.h:69
n
const Int_t n
Definition: legend1.C:16
MethodANNBase.h
TMVA::Tools::NormVariable
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition: Tools.cxx:122
TFitter.h
TMVA::GeneticFitter
Fitter using a Genetic Algorithm.
Definition: GeneticFitter.h:44
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:91
TMVA::Types::kMulticlass
@ kMulticlass
Definition: Types.h:131
e
#define e(i)
Definition: RSha256.hxx:103
TMVA::MethodMLP::TrainOneEpoch
void TrainOneEpoch()
train network over a single epoch/cycle of events
Definition: MethodMLP.cxx:1142
TMVA::MethodMLP::TrainOneEventFast
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
Definition: MethodMLP.cxx:1222
TMVA::MethodMLP::CalculateEstimator
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Definition: MethodMLP.cxx:292
TObjArray
An array of TObjects.
Definition: TObjArray.h:37
TMVA::MethodMLP::UpdatePriors
void UpdatePriors()
Definition: MethodMLP.cxx:1454
f
#define f(i)
Definition: RSha256.hxx:104
TMVA::MethodMLP::Init
void Init()
default initializations
Definition: MethodMLP.cxx:164
TMVA::TSynapse::SetWeight
void SetWeight(Double_t weight)
set synapse weight
Definition: TSynapse.cxx:68
TMVA::MethodMLP::DeclareOptions
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodMLP.cxx:195
TMVA::MethodMLP::SetDir
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
Definition: MethodMLP.cxx:808
TFitter::ExecuteCommand
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
Definition: TFitter.cxx:92
TMVA::MethodMLP::~MethodMLP
virtual ~MethodMLP()
destructor nothing to be done
Definition: MethodMLP.cxx:138
TMVA::Types::kRegression
@ kRegression
Definition: Types.h:130
TFitter::SetParameter
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter
Definition: TFitter.cxx:596
TMVA::MethodANNBase
Base class for all TMVA methods using artificial neural networks.
Definition: MethodANNBase.h:62
TMVA::TSynapse::GetWeight
Double_t GetWeight()
Definition: TSynapse.h:53
TMVA::MethodANNBase::GetMvaValue
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Definition: MethodANNBase.cxx:597
DataSetInfo.h
ClassImp
#define ClassImp(name)
Definition: Rtypes.h:364
Form
char * Form(const char *fmt,...)
TMVA::TNeuron::UpdateSynapsesSequential
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
Definition: TNeuron.cxx:242
TMVA::Event::GetClass
UInt_t GetClass() const
Definition: Event.h:86
TMVA::TNeuron::CalculateDelta
void CalculateDelta()
calculate error field
Definition: TNeuron.cxx:115
IMethod.h
TMath::Log
Double_t Log(Double_t x)
Definition: TMath.h:760
output
static void output(int code)
Definition: gifencode.c:226
TMVA::Types::kTesting
@ kTesting
Definition: Types.h:146
TMVA::MethodANNBase::ProcessOptions
virtual void ProcessOptions()
do nothing specific at this moment
Definition: MethodANNBase.cxx:159
TMath::Sqrt
Double_t Sqrt(Double_t x)
Definition: TMath.h:691
exp
double exp(double)
Float_t
float Float_t
Definition: RtypesCore.h:57
TMath::Exp
Double_t Exp(Double_t x)
Definition: TMath.h:727
TMVA::MethodMLP::BFGSMinimize
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Definition: MethodMLP.cxx:487
Int_t
int Int_t
Definition: RtypesCore.h:45
TMVA::Timer::DrawProgressBar
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:202
x
Double_t x[n]
Definition: legend1.C:17
TMVA::MethodMLP::Shuffle
void Shuffle(Int_t *index, Int_t n)
Input:
Definition: MethodMLP.cxx:1190
TMVA::TSynapse::InitDelta
void InitDelta()
Definition: TSynapse.h:83
TMVA::MethodMLP::GeneticMinimize
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Definition: MethodMLP.cxx:1356
TMVA::Event::GetTarget
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
TMVA::TSynapse::CalculateDelta
void CalculateDelta()
calculate/adjust the error field for this synapse
Definition: TSynapse.cxx:108
TMVA::TNeuron::AdjustSynapseWeights
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
Definition: TNeuron.cxx:263
TMVA::MethodMLP::ComputeDEDw
void ComputeDEDw()
Definition: MethodMLP.cxx:697
TObjArray::At
TObject * At(Int_t idx) const
Definition: TObjArray.h:166
TMVA::MethodMLP::GetApproxInvHessian
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
Definition: MethodMLP.cxx:1508
TSynapse::SetWeight
void SetWeight(Double_t w)
Sets the weight of the synapse.
Definition: TSynapse.cxx:102
TString
Basic string class.
Definition: TString.h:136
TMVA::MethodMLP::TrainOneEvent
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Definition: MethodMLP.cxx:1258
TMVA::MethodMLP::GetHessian
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Definition: MethodMLP.cxx:787
TMatrixT< Double_t >
TFitter::SetFCN
virtual void SetFCN(void(*fcn)(Int_t &, Double_t *, Double_t &f, Double_t *, Int_t))
Specify the address of the fitting algorithm.
Definition: TFitter.cxx:564
TMVA::Event::GetTargets
std::vector< Float_t > & GetTargets()
Definition: Event.h:103
Bool_t
bool Bool_t
Definition: RtypesCore.h:63
TMatrixT::Transpose
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Definition: TMatrixT.cxx:1470
TMVA::MethodMLP::SteepestDir
void SteepestDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:774
v
@ v
Definition: rootcling_impl.cxx:3635
TString.h
REGISTER_METHOD
#define REGISTER_METHOD(CLASS)
for example
Definition: ClassifierFactory.h:124
bool
TMVA::MethodMLP::InitializeLearningRates
void InitializeLearningRates()
initialize learning rates of synapses, used only by back propagation
Definition: MethodMLP.cxx:278
TMVA::MethodMLP::DecaySynapseWeights
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum
Definition: MethodMLP.cxx:1208
TMath::Gamma
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:348
TMVA::MethodMLP::SetDirWeights
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
Definition: MethodMLP.cxx:950
TMVA::MethodMLP::UpdateSynapses
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
Definition: MethodMLP.cxx:1412
TMVA::DataSetInfo
Class that contains all the data information.
Definition: DataSetInfo.h:62
ROOT::Math::fabs
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Definition: UnaryOperators.h:131
TMVA::TNeuron
Neuron class used by TMVA artificial neural network methods.
Definition: TNeuron.h:49
TMVA::TNeuron::UpdateSynapsesBatch
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
Definition: TNeuron.cxx:224
TMVA::FitterBase::Run
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:74
TSynapse.h
MsgLogger.h
Timer.h
MethodMLP.h
TMVA::Types::EAnalysisType
EAnalysisType
Definition: Types.h:128
TMatrixT::Invert
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
Definition: TMatrixT.cxx:1397
TMVA::Config::WriteOptionsReference
Bool_t WriteOptionsReference() const
Definition: Config.h:67
TMVA::TSynapse
Synapse class used by TMVA artificial neural network methods.
Definition: TSynapse.h:42
TMVA::Types::ETreeType
ETreeType
Definition: Types.h:144
TObjArray::GetEntriesFast
Int_t GetEntriesFast() const
Definition: TObjArray.h:64
TMVA::MethodMLP::GetCEErr
Double_t GetCEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1020
a
auto * a
Definition: textangle.C:12
TMVA::gConfig
Config & gConfig()
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:92
TMVA::MethodMLP::MakeClassSpecific
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodMLP.cxx:1704
GeneticFitter.h
TMVA::Types::kClassification
@ kClassification
Definition: Types.h:129
TH1::Fill
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3327
TMVA::TNeuron::ForceValue
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
Definition: TNeuron.cxx:84
TMVA::MethodMLP::CalculateNeuronDeltas
void CalculateNeuronDeltas()
have each neuron calculate its delta by back propagation
Definition: MethodMLP.cxx:1328
TMVA::MethodMLP::GetMvaValue
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Definition: MethodMLP.cxx:1549
ConvergenceTest.h
TMVA::MethodMLP::UpdateNetwork
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
Definition: MethodMLP.cxx:1286
sqrt
double sqrt(double)
TMVA::MethodMLP::HasAnalysisType
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Definition: MethodMLP.cxx:152
TMVA::TSynapse::DecayLearningRate
void DecayLearningRate(Double_t rate)
Definition: TSynapse.h:62
TMVA::Types
Singleton class for Global types used by TMVA.
Definition: Types.h:73
Types.h
Configurable.h
TMVA::MethodMLP::LineSearch
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
Definition: MethodMLP.cxx:840
TMVA::TSynapse::SetDEDw
void SetDEDw(Double_t DEDw)
Definition: TSynapse.h:85
TMVA::Endl
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Config.h
unsigned int
TMVA::MethodMLP::ProcessOptions
void ProcessOptions()
process user options
Definition: MethodMLP.cxx:247
TMVA::Timer
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
TMVA::MethodMLP::GetDesiredOutput
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
Definition: MethodMLP.cxx:1277
TMVA::MethodMLP::BackPropagationMinimize
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with back propagation algorithm
Definition: MethodMLP.cxx:1037
TMVA::Tools::Color
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
TMVA::Types::kTraining
@ kTraining
Definition: Types.h:145
TMVA::MethodMLP::AdjustSynapseWeights
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
Definition: MethodMLP.cxx:1434
TMVA::TSynapse::GetDelta
Double_t GetDelta()
Definition: TSynapse.h:87
TMVA::MethodMLP::SetGammaDelta
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Definition: MethodMLP.cxx:672
Double_t
double Double_t
Definition: RtypesCore.h:59
IFitterTarget.h
TMVA::MethodANNBase::MakeClassSpecific
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodANNBase.cxx:1042
TMVA::Event::GetWeight
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition: Event.cxx:381
TMatrixD
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
TMatrixT< Double_t >::kTransposed
@ kTransposed
Definition: TMatrixT.h:58
TH1F
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
TMVA::Event
Definition: Event.h:51
TH1
TH1 is the base class of all histogramm classes in ROOT.
Definition: TH1.h:58
name
char name[80]
Definition: TGX11.cxx:110
TMVA::MethodMLP::Train
void Train()
Definition: MethodMLP.cxx:142
TMVA::MethodMLP::UpdateRegulators
void UpdateRegulators()
Definition: MethodMLP.cxx:1468
d
#define d(i)
Definition: RSha256.hxx:102
TFitter
The ROOT standard fitter based on TMinuit.
Definition: TFitter.h:19
Tools.h
ClassifierFactory.h
type
int type
Definition: TGX11.cxx:121
TMatrixT< Double_t >::kMult
@ kMult
Definition: TMatrixT.h:59
TMatrixD.h
TMVA::MethodMLP::DerivDir
Double_t DerivDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:825
TMVA::gTools
Tools & gTools()
TMVA::MethodMLP::SimulateEvent
void SimulateEvent(const Event *ev)
Definition: MethodMLP.cxx:734
TMVA::MethodMLP::ComputeEstimator
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
Definition: MethodMLP.cxx:1393
TMVA::MethodMLP::GetHelpMessage
void GetHelpMessage() const
get help message text
Definition: MethodMLP.cxx:1715
TH1.h
DataSet.h
TMatrixT< Double_t >::kTransposeMult
@ kTransposeMult
Definition: TMatrixT.h:59
TMVA::TSynapse::SetLearningRate
void SetLearningRate(Double_t rate)
Definition: TSynapse.h:56
TMVA::TSynapse::GetDEDw
Double_t GetDEDw()
Definition: TSynapse.h:86
FitterBase.h
TMath.h
Interval.h
TSynapse
This is a simple weighted bidirectional connection between two neurons.
Definition: TSynapse.h:20
TMVA::MethodMLP::MethodMLP
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
Definition: MethodMLP.cxx:90
TMVA::MethodMLP::EstimatorFunction
Double_t EstimatorFunction(std::vector< Double_t > &parameters)
interface to the estimate
Definition: MethodMLP.cxx:1385
int
TMatrixT::ResizeTo
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
Definition: TMatrixT.cxx:1211
TMVA::MethodMLP::GetMSEErr
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1003
TMVA::Interval
The TMVA::Interval Class.
Definition: Interval.h:61
TMVA::FitterBase
Base class for TMVA fitters.
Definition: FitterBase.h:51
TMVA::MethodMLP::GetError
Double_t GetError()
Definition: MethodMLP.cxx:966
TNeuron.h