Logo ROOT   6.08/07
Reference Guide
MethodMLP.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard v. Toerne, Jan Therhaag, Jiahang Zhong
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodMLP *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * ANN Multilayer Perceptron class for the discrimination of signal *
12  * from background. BFGS implementation based on TMultiLayerPerceptron *
13  * class from ROOT (http://root.cern.ch). *
14  * *
15  * Authors (alphabetical): *
16  * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19  * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20  * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25  * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26  * *
27  * Copyright (c) 2005-2011: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * MPI-K Heidelberg, Germany *
31  * U. of Bonn, Germany *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 //_______________________________________________________________________
39 //
40 // Multilayer Perceptron class built off of MethodANNBase
41 //_______________________________________________________________________
42 
43 #include "TMVA/MethodMLP.h"
44 
45 #include "TMVA/Config.h"
46 #include "TMVA/Configurable.h"
47 #include "TMVA/ConvergenceTest.h"
48 #include "TMVA/ClassifierFactory.h"
49 #include "TMVA/DataSet.h"
50 #include "TMVA/DataSetInfo.h"
51 #include "TMVA/FitterBase.h"
52 #include "TMVA/GeneticFitter.h"
53 #include "TMVA/IFitterTarget.h"
54 #include "TMVA/IMethod.h"
55 #include "TMVA/Interval.h"
56 #include "TMVA/MethodANNBase.h"
57 #include "TMVA/MsgLogger.h"
58 #include "TMVA/TNeuron.h"
59 #include "TMVA/TSynapse.h"
60 #include "TMVA/Timer.h"
61 #include "TMVA/Tools.h"
62 #include "TMVA/Types.h"
63 
64 #include "TH1.h"
65 #include "TString.h"
66 #include "TTree.h"
67 #include "Riostream.h"
68 #include "TFitter.h"
69 #include "TMatrixD.h"
70 #include "TMath.h"
71 #include "TFile.h"
72 
73 #include <cmath>
74 #include <vector>
75 
76 #ifdef MethodMLP_UseMinuit__
77 TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
78 Bool_t MethodMLP_UseMinuit = kTRUE;
79 #endif
80 
81 REGISTER_METHOD(MLP)
82 
84 
85  using std::vector;
86 
87 ////////////////////////////////////////////////////////////////////////////////
88 /// standard constructor
89 
91  const TString& methodTitle,
92  DataSetInfo& theData,
93  const TString& theOption)
94  : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption),
95  fUseRegulator(false), fCalculateErrors(false),
96  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
97  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
98  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
99  fSamplingTraining(false), fSamplingTesting(false),
100  fLastAlpha(0.0), fTau(0.),
101  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
102  fBPMode(kSequential), fBpModeS("None"),
103  fBatchSize(0), fTestRate(0), fEpochMon(false),
104  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
105  fGA_SC_rate(0), fGA_SC_factor(0.0),
106  fDeviationsFromTargets(0),
107  fWeightRange (1.0)
108 {
109 
110 }
111 
112 ////////////////////////////////////////////////////////////////////////////////
113 /// constructor from a weight file
114 
116  const TString& theWeightFile)
117  : MethodANNBase( Types::kMLP, theData, theWeightFile),
118  fUseRegulator(false), fCalculateErrors(false),
119  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
122  fSamplingTraining(false), fSamplingTesting(false),
123  fLastAlpha(0.0), fTau(0.),
124  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
125  fBPMode(kSequential), fBpModeS("None"),
126  fBatchSize(0), fTestRate(0), fEpochMon(false),
128  fGA_SC_rate(0), fGA_SC_factor(0.0),
130  fWeightRange (1.0)
131 {
132 }
133 
134 ////////////////////////////////////////////////////////////////////////////////
135 /// destructor
136 /// nothing to be done
137 
139 {
140 }
141 
143 {
144  Train(NumCycles());
145 }
146 
147 
148 
149 ////////////////////////////////////////////////////////////////////////////////
150 /// MLP can handle classification with 2 classes and regression with one regression-target
151 
153 {
154  if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
155  if (type == Types::kMulticlass ) return kTRUE;
156  if (type == Types::kRegression ) return kTRUE;
157 
158  return kFALSE;
159 }
160 
161 ////////////////////////////////////////////////////////////////////////////////
162 /// default initializations
163 
165 {
166  // the minimum requirement to declare an event signal-like
167  SetSignalReferenceCut( 0.5 );
168 #ifdef MethodMLP_UseMinuit__
169  fgThis = this;
170 #endif
171 }
172 
173 ////////////////////////////////////////////////////////////////////////////////
174 /// define the options (their key words) that can be set in the option string
175 /// know options:
176 /// TrainingMethod <string> Training method
177 /// available values are: BP Back-Propagation <default>
178 /// GA Genetic Algorithm (takes a LONG time)
179 ///
180 /// LearningRate <float> NN learning rate parameter
181 /// DecayRate <float> Decay rate for learning parameter
182 /// TestRate <int> Test for overtraining performed at each #th epochs
183 ///
184 /// BPMode <string> Back-propagation learning mode
185 /// available values are: sequential <default>
186 /// batch
187 ///
188 /// BatchSize <int> Batch size: number of events/batch, only set if in Batch Mode,
189 /// -1 for BatchSize=number_of_events
190 
192 {
193  DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
194  "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
195  AddPreDefVal(TString("BP"));
196  AddPreDefVal(TString("GA"));
197  AddPreDefVal(TString("BFGS"));
198 
199  DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
200  DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
201  DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
202  DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
203 
204  DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
205  DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
206  DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
207 
208  DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
209  DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
210 
211  DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
212  DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
213 
214  DeclareOptionRef(fBpModeS="sequential", "BPMode",
215  "Back-propagation learning mode: sequential or batch");
216  AddPreDefVal(TString("sequential"));
217  AddPreDefVal(TString("batch"));
218 
219  DeclareOptionRef(fBatchSize=-1, "BatchSize",
220  "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
221 
222  DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
223  "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
224 
225  DeclareOptionRef(fSteps=-1, "ConvergenceTests",
226  "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
227 
228  DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
229  "Use regulator to avoid over-training"); //zjh
230  DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit",
231  "Maximum times of regulator update"); //zjh
232  DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
233  "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh
234 
235  DeclareOptionRef(fWeightRange=1.0, "WeightRange",
236  "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
237 
238 }
239 
240 
241 ////////////////////////////////////////////////////////////////////////////////
242 /// process user options
243 
245 {
247 
248 
250  Log() << kINFO
251  << "Will ignore negative events in training!"
252  << Endl;
253  }
254 
255 
256  if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
257  else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
258  else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
259 
260  if (fBpModeS == "sequential") fBPMode = kSequential;
261  else if (fBpModeS == "batch") fBPMode = kBatch;
262 
263  // InitializeLearningRates();
264 
265  if (fBPMode == kBatch) {
267  Int_t numEvents = Data()->GetNEvents();
268  if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
269  }
270 }
271 
272 ////////////////////////////////////////////////////////////////////////////////
273 /// initialize learning rates of synapses, used only by backpropagation
274 
276 {
277  Log() << kDEBUG << "Initialize learning rates" << Endl;
278  TSynapse *synapse;
279  Int_t numSynapses = fSynapses->GetEntriesFast();
280  for (Int_t i = 0; i < numSynapses; i++) {
281  synapse = (TSynapse*)fSynapses->At(i);
282  synapse->SetLearningRate(fLearnRate);
283  }
284 }
285 
286 ////////////////////////////////////////////////////////////////////////////////
287 /// calculate the estimator that training is attempting to minimize
288 
290 {
291  // sanity check
292  if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
293  Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
294  }
295 
296  Types::ETreeType saveType = Data()->GetCurrentType();
297  Data()->SetCurrentType(treeType);
298 
299  // if epochs are counted create monitoring histograms (only available for classification)
300  TString type = (treeType == Types::kTraining ? "train" : "test");
301  TString name = Form("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
302  TString nameB = name + "_B";
303  TString nameS = name + "_S";
304  Int_t nbin = 100;
305  Float_t limit = 2;
306  TH1* histS = 0;
307  TH1* histB = 0;
308  if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
309  histS = new TH1F( nameS, nameS, nbin, -limit, limit );
310  histB = new TH1F( nameB, nameB, nbin, -limit, limit );
311  }
312 
313  Double_t estimator = 0;
314 
315  // loop over all training events
317  UInt_t nClasses = DataInfo().GetNClasses();
318  UInt_t nTgts = DataInfo().GetNTargets();
319 
320 
321  Float_t sumOfWeights = 0.f;
322  if( fWeightRange < 1.f ){
323  fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents);
324  }
325 
326  for (Int_t i = 0; i < nEvents; i++) {
327 
328  const Event* ev = GetEvent(i);
329 
331  && (saveType == Types::kTraining)){
332  continue;
333  }
334 
335  Double_t w = ev->GetWeight();
336 
337  ForceNetworkInputs( ev );
339 
340  Double_t d = 0, v = 0;
341  if (DoRegression()) {
342  for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
343  v = GetOutputNeuron( itgt )->GetActivationValue();
344  Double_t targetValue = ev->GetTarget( itgt );
345  Double_t dt = v - targetValue;
346  d += (dt*dt);
347  }
348  estimator += d*w;
349  } else if (DoMulticlass() ) {
350  UInt_t cls = ev->GetClass();
351  if (fEstimator==kCE){
352  Double_t norm(0);
353  for (UInt_t icls = 0; icls < nClasses; icls++) {
354  Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
355  norm += exp( activationValue );
356  if(icls==cls)
357  d = exp( activationValue );
358  }
359  d = -TMath::Log(d/norm);
360  }
361  else{
362  for (UInt_t icls = 0; icls < nClasses; icls++) {
363  Double_t desired = (icls==cls) ? 1.0 : 0.0;
364  v = GetOutputNeuron( icls )->GetActivationValue();
365  d = (desired-v)*(desired-v);
366  }
367  }
368  estimator += d*w; //zjh
369  } else {
370  Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
372  if (fEstimator==kMSE) d = (desired-v)*(desired-v); //zjh
373  else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v)); //zjh
374  estimator += d*w; //zjh
375  }
376 
378  fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
379 
380  sumOfWeights += w;
381 
382 
383  // fill monitoring histograms
384  if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
385  else if (histB != 0) histB->Fill( float(v), float(w) );
386  }
387 
388 
389  if( fDeviationsFromTargets ) {
390  std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
391 
392  Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
393  estimator = 0.f;
394 
395  Float_t weightRangeCut = fWeightRange*sumOfWeights;
396  Float_t weightSum = 0.f;
397  for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
398  float deviation = (*itDev).first;
399  float devWeight = (*itDev).second;
400  weightSum += devWeight; // add the weight of this event
401  if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange
402  estimator += devWeight*deviation;
403  }
404  }
405 
406  sumOfWeights = sumOfWeightsInRange;
407  delete fDeviationsFromTargets;
408  }
409 
410  if (histS != 0) fEpochMonHistS.push_back( histS );
411  if (histB != 0) fEpochMonHistB.push_back( histB );
412 
413  //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
414  //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
415  //else estimator = estimator*0.5/Float_t(nEvents);
416  if (DoRegression()) estimator = estimator/Float_t(sumOfWeights);
417  else if (DoMulticlass()) estimator = estimator/Float_t(sumOfWeights);
418  else estimator = estimator/Float_t(sumOfWeights);
419 
420 
421  //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh
422 
423  Data()->SetCurrentType( saveType );
424 
425  // provide epoch-wise monitoring
426  if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
427  CreateWeightMonitoringHists( Form("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
428  }
429 
430  return estimator;
431 }
432 
433 ////////////////////////////////////////////////////////////////////////////////
434 
436 {
437  if (fNetwork == 0) {
438  //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
439  Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
441  }
442  Log() << kDEBUG << "reinitalize learning rates" << Endl;
444  Log() << kHEADER;
445  PrintMessage("Training Network");
446  Log() << Endl;
448  Int_t nSynapses=fSynapses->GetEntriesFast();
449  if (nSynapses>nEvents)
450  Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
451 
452  fIPyMaxIter = nEpochs;
454  std::vector<TString> titles = {"Error on training set", "Error on test set"};
455  fInteractive->Init(titles);
456  }
457 
458 #ifdef MethodMLP_UseMinuit__
459  if (useMinuit) MinuitMinimize();
460 #else
462  else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
463  else BackPropagationMinimize(nEpochs);
464 #endif
465 
466  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
467  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
468  if (fUseRegulator){
469  Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
471  Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
472  }
473 
475  {
476  Int_t numSynapses=fSynapses->GetEntriesFast();
477  fInvHessian.ResizeTo(numSynapses,numSynapses);
479  }
481 }
482 
483 ////////////////////////////////////////////////////////////////////////////////
484 /// train network with BFGS algorithm
485 
487 {
488  Timer timer( (fSteps>0?100:nEpochs), GetName() );
489 
490  // create histograms for overtraining monitoring
491  Int_t nbinTest = Int_t(nEpochs/fTestRate);
492  if(!IsSilentFile())
493  {
494  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
495  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
496  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
497  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
498  }
499 
500  Int_t nSynapses = fSynapses->GetEntriesFast();
501  Int_t nWeights = nSynapses;
502 
503  for (Int_t i=0;i<nSynapses;i++) {
504  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
505  synapse->SetDEDw(0.0);
506  }
507 
508  std::vector<Double_t> buffer( nWeights );
509  for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
510 
511  TMatrixD Dir ( nWeights, 1 );
512  TMatrixD Hessian ( nWeights, nWeights );
513  TMatrixD Gamma ( nWeights, 1 );
514  TMatrixD Delta ( nWeights, 1 );
515  Int_t RegUpdateCD=0; //zjh
516  Int_t RegUpdateTimes=0; //zjh
517  Double_t AccuError=0;
518 
519  Double_t trainE = -1;
520  Double_t testE = -1;
521 
522  fLastAlpha = 0.;
523 
525  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
526 
527  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
528  timer.DrawProgressBar( 0 );
529 
530  // start training cycles (epochs)
531  for (Int_t i = 0; i < nEpochs; i++) {
532 
533  if (fExitFromTraining) break;
534  fIPyCurrentIter = i;
535  if (Float_t(i)/nEpochs < fSamplingEpoch) {
536  if ((i+1)%fTestRate == 0 || (i == 0)) {
537  if (fSamplingTraining) {
540  Data()->CreateSampling();
541  }
542  if (fSamplingTesting) {
545  Data()->CreateSampling();
546  }
547  }
548  }
549  else {
551  Data()->InitSampling(1.0,1.0);
553  Data()->InitSampling(1.0,1.0);
554  }
556 
557  //zjh
558  if (fUseRegulator) {
559  UpdatePriors();
560  RegUpdateCD++;
561  }
562  //zjh
563 
564  SetGammaDelta( Gamma, Delta, buffer );
565 
566  if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
567  SteepestDir( Dir );
568  Hessian.UnitMatrix();
569  RegUpdateCD=0; //zjh
570  }
571  else {
572  if (GetHessian( Hessian, Gamma, Delta )) {
573  SteepestDir( Dir );
574  Hessian.UnitMatrix();
575  RegUpdateCD=0; //zjh
576  }
577  else SetDir( Hessian, Dir );
578  }
579 
580  Double_t dError=0; //zjh
581  if (DerivDir( Dir ) > 0) {
582  SteepestDir( Dir );
583  Hessian.UnitMatrix();
584  RegUpdateCD=0; //zjh
585  }
586  if (LineSearch( Dir, buffer, &dError )) { //zjh
587  Hessian.UnitMatrix();
588  SteepestDir( Dir );
589  RegUpdateCD=0; //zjh
590  if (LineSearch(Dir, buffer, &dError)) { //zjh
591  i = nEpochs;
592  Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
593  }
594  }
595 
596  //zjh+
597  if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
598  AccuError+=dError;
599 
600  if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
601  Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
603  Hessian.UnitMatrix();
604  RegUpdateCD=0;
605  RegUpdateTimes++;
606  AccuError=0;
607  }
608  //zjh-
609 
610  // monitor convergence of training and control sample
611  if ((i+1)%fTestRate == 0) {
612  //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample //zjh
613  //testE = CalculateEstimator( Types::kTesting, i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
614  trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample //zjh
615  testE = CalculateEstimator( Types::kTesting, i ) ; // estimator for test sample //zjh
616  if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
617  if(!IsSilentFile()) //saved to see in TMVAGui, no needed without file
618  {
619  fEstimatorHistTrain->Fill( i+1, trainE );
620  fEstimatorHistTest ->Fill( i+1, testE );
621  }
622  Bool_t success = kFALSE;
623  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
624  success = kTRUE;
625  }
626  Data()->EventResult( success );
627 
628  SetCurrentValue( testE );
629  if (HasConverged()) {
630  if (Float_t(i)/nEpochs < fSamplingEpoch) {
631  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
632  i = newEpoch;
634  }
635  else break;
636  }
637  }
638 
639  // draw progress
640  TString convText = Form( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh
641  if (fSteps > 0) {
642  Float_t progress = 0;
643  if (Float_t(i)/nEpochs < fSamplingEpoch)
644  // progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
645  progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
646  else
647  {
648  // progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
649  progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
650  }
651  Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
652  if (progress2>progress) progress=progress2; //zjh
653  timer.DrawProgressBar( Int_t(progress), convText );
654  }
655  else {
656  Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
657  if (progress<i) progress=i; //zjh
658  timer.DrawProgressBar( progress, convText ); //zjh
659  }
660 
661  // some verbose output
662  if (fgPRINT_SEQ) {
663  PrintNetwork();
664  WaitForKeyboard();
665  }
666  }
667 }
668 
669 ////////////////////////////////////////////////////////////////////////////////
670 
671 void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
672 {
673  Int_t nWeights = fSynapses->GetEntriesFast();
674 
675  Int_t IDX = 0;
676  Int_t nSynapses = fSynapses->GetEntriesFast();
677  for (Int_t i=0;i<nSynapses;i++) {
678  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
679  Gamma[IDX++][0] = -synapse->GetDEDw();
680  }
681 
682  for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
683 
684  ComputeDEDw();
685 
686  IDX = 0;
687  for (Int_t i=0;i<nSynapses;i++)
688  {
689  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
690  Gamma[IDX++][0] += synapse->GetDEDw();
691  }
692 }
693 
694 ////////////////////////////////////////////////////////////////////////////////
695 
697 {
698  Int_t nSynapses = fSynapses->GetEntriesFast();
699  for (Int_t i=0;i<nSynapses;i++) {
700  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
701  synapse->SetDEDw( 0.0 );
702  }
703 
705  Int_t nPosEvents = nEvents;
706  for (Int_t i=0;i<nEvents;i++) {
707 
708  const Event* ev = GetEvent(i);
710  && (Data()->GetCurrentType() == Types::kTraining)){
711  --nPosEvents;
712  continue;
713  }
714 
715  SimulateEvent( ev );
716 
717  for (Int_t j=0;j<nSynapses;j++) {
718  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
719  synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
720  }
721  }
722 
723  for (Int_t i=0;i<nSynapses;i++) {
724  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
725  Double_t DEDw=synapse->GetDEDw(); //zjh
726  if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
727  synapse->SetDEDw( DEDw / nPosEvents ); //zjh
728  }
729 }
730 
731 ////////////////////////////////////////////////////////////////////////////////
732 
734 {
735  Double_t eventWeight = ev->GetWeight();
736 
737  ForceNetworkInputs( ev );
739 
740  if (DoRegression()) {
741  UInt_t ntgt = DataInfo().GetNTargets();
742  for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
743  Double_t desired = ev->GetTarget(itgt);
744  Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
745  GetOutputNeuron( itgt )->SetError(error);
746  }
747  } else if (DoMulticlass()) {
748  UInt_t nClasses = DataInfo().GetNClasses();
749  UInt_t cls = ev->GetClass();
750  for (UInt_t icls = 0; icls < nClasses; icls++) {
751  Double_t desired = ( cls==icls ? 1.0 : 0.0 );
752  Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
753  GetOutputNeuron( icls )->SetError(error);
754  }
755  } else {
756  Double_t desired = GetDesiredOutput( ev );
757  Double_t error=-1; //zjh
758  if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight; //zjh
759  else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
760  GetOutputNeuron()->SetError(error);
761  }
762 
764  for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
765  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
766  synapse->InitDelta();
767  synapse->CalculateDelta();
768  }
769 }
770 
771 ////////////////////////////////////////////////////////////////////////////////
772 
774 {
775  Int_t IDX = 0;
776  Int_t nSynapses = fSynapses->GetEntriesFast();
777 
778  for (Int_t i=0;i<nSynapses;i++) {
779  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
780  Dir[IDX++][0] = -synapse->GetDEDw();
781  }
782 }
783 
784 ////////////////////////////////////////////////////////////////////////////////
785 
787 {
788  TMatrixD gd(Gamma, TMatrixD::kTransposeMult, Delta);
789  if ((Double_t) gd[0][0] == 0.) return kTRUE;
790  TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
791  TMatrixD tmp(Gamma, TMatrixD::kTransposeMult, Hessian);
792  TMatrixD gHg(Gamma, TMatrixD::kTransposeMult, aHg);
793  Double_t a = 1 / (Double_t) gd[0][0];
794  Double_t f = 1 + ((Double_t)gHg[0][0]*a);
796  res *= f;
797  res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
799  res *= a;
800  Hessian += res;
801 
802  return kFALSE;
803 }
804 
805 ////////////////////////////////////////////////////////////////////////////////
806 
808 {
809  Int_t IDX = 0;
810  Int_t nSynapses = fSynapses->GetEntriesFast();
811  TMatrixD DEDw(nSynapses, 1);
812 
813  for (Int_t i=0;i<nSynapses;i++) {
814  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
815  DEDw[IDX++][0] = synapse->GetDEDw();
816  }
817 
818  dir = Hessian * DEDw;
819  for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
820 }
821 
822 ////////////////////////////////////////////////////////////////////////////////
823 
825 {
826  Int_t IDX = 0;
827  Int_t nSynapses = fSynapses->GetEntriesFast();
828  Double_t Result = 0.0;
829 
830  for (Int_t i=0;i<nSynapses;i++) {
831  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
832  Result += Dir[IDX++][0] * synapse->GetDEDw();
833  }
834  return Result;
835 }
836 
837 ////////////////////////////////////////////////////////////////////////////////
838 
839 Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
840 {
841  Int_t IDX = 0;
842  Int_t nSynapses = fSynapses->GetEntriesFast();
843  Int_t nWeights = nSynapses;
844 
845  std::vector<Double_t> Origin(nWeights);
846  for (Int_t i=0;i<nSynapses;i++) {
847  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
848  Origin[i] = synapse->GetWeight();
849  }
850 
851  Double_t err1 = GetError();
852  Double_t errOrigin=err1;//zjh
853  Double_t alpha1 = 0.;
854  Double_t alpha2 = fLastAlpha;
855 
856 
857  if (alpha2 < 0.01) alpha2 = 0.01;
858  else if (alpha2 > 2.0) alpha2 = 2.0;
859  Double_t alpha_original = alpha2;
860  Double_t alpha3 = alpha2;
861 
862  SetDirWeights( Origin, Dir, alpha2 );
863  Double_t err2 = GetError();
864  //Double_t err2 = err1;
865  Double_t err3 = err2;
866  Bool_t bingo = kFALSE;
867 
868 
869  if (err1 > err2) {
870  for (Int_t i=0;i<100;i++) {
871  alpha3 *= fTau;
872  SetDirWeights(Origin, Dir, alpha3);
873  err3 = GetError();
874  if (err3 > err2) {
875  bingo = kTRUE;
876  break;
877  }
878  alpha1 = alpha2;
879  err1 = err2;
880  alpha2 = alpha3;
881  err2 = err3;
882  }
883  if (!bingo) {
884  SetDirWeights(Origin, Dir, 0.);
885  return kTRUE;
886  }
887  }
888  else {
889  for (Int_t i=0;i<100;i++) {
890  alpha2 /= fTau;
891  if (i==50) {
892  Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
893  alpha2 = -alpha_original;
894  }
895  SetDirWeights(Origin, Dir, alpha2);
896  err2 = GetError();
897  if (err1 > err2) {
898  bingo = kTRUE;
899  break;
900  }
901  alpha3 = alpha2;
902  err3 = err2;
903  }
904  if (!bingo) {
905  SetDirWeights(Origin, Dir, 0.);
906  Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
907  fLastAlpha = 0.05;
908  return kTRUE;
909  }
910  }
911 
912  if (alpha1>0 && alpha2>0 && alpha3 > 0) {
913  fLastAlpha = 0.5 * (alpha1 + alpha3 -
914  (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
915  - ( err2 - err1 ) / (alpha2 - alpha1 )));
916  }
917  else {
918  fLastAlpha = alpha2;
919  }
920 
921  fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
922 
923  SetDirWeights(Origin, Dir, fLastAlpha);
924 
925  // leaving these lines uncommented is a heavy price to pay for only a warning message
926  // (which shoulnd't appear anyway)
927  // --> about 15% of time is spent in the final GetError().
928  //
929  Double_t finalError = GetError();
930  if (finalError > err1) {
931  Log() << kWARNING << "Line search increased error! Something is wrong."
932  << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
933  << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
934  }
935 
936  for (Int_t i=0;i<nSynapses;i++) {
937  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
938  buffer[IDX] = synapse->GetWeight() - Origin[IDX];
939  IDX++;
940  }
941 
942  if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
943 
944  return kFALSE;
945 }
946 
947 ////////////////////////////////////////////////////////////////////////////////
948 
949 void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
950 {
951  Int_t IDX = 0;
952  Int_t nSynapses = fSynapses->GetEntriesFast();
953 
954  for (Int_t i=0;i<nSynapses;i++) {
955  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
956  synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
957  IDX++;
958  }
959  if (fUseRegulator) UpdatePriors();//zjh
960 }
961 
962 
963 ////////////////////////////////////////////////////////////////////////////////
964 
966 {
968  UInt_t ntgts = GetNTargets();
969  Double_t Result = 0.;
970 
971  for (Int_t i=0;i<nEvents;i++) {
972  const Event* ev = GetEvent(i);
973 
975  && (Data()->GetCurrentType() == Types::kTraining)){
976  continue;
977  }
978  SimulateEvent( ev );
979 
980  Double_t error = 0.;
981  if (DoRegression()) {
982  for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
983  error += GetMSEErr( ev, itgt );//zjh
984  }
985  } else if ( DoMulticlass() ){
986  for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
987  error += GetMSEErr( ev, icls );
988  }
989  } else {
990  if (fEstimator==kMSE) error = GetMSEErr( ev ); //zjh
991  else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
992  }
993  Result += error * ev->GetWeight();
994  }
995  if (fUseRegulator) Result+=fPrior; //zjh
996  if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
997  return Result;
998 }
999 
1000 ////////////////////////////////////////////////////////////////////////////////
1001 
1003 {
1004  Double_t error = 0;
1006  Double_t target = 0;
1007  if (DoRegression()) target = ev->GetTarget( index );
1008  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1009  else target = GetDesiredOutput( ev );
1010 
1011  error = 0.5*(output-target)*(output-target); //zjh
1012 
1013  return error;
1014 
1015 }
1016 
1017 ////////////////////////////////////////////////////////////////////////////////
1018 
1020 {
1021  Double_t error = 0;
1023  Double_t target = 0;
1024  if (DoRegression()) target = ev->GetTarget( index );
1025  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1026  else target = GetDesiredOutput( ev );
1027 
1028  error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
1029 
1030  return error;
1031 }
1032 
1033 ////////////////////////////////////////////////////////////////////////////////
1034 /// minimize estimator / train network with backpropagation algorithm
1035 
1037 {
1038  // Timer timer( nEpochs, GetName() );
1039  Timer timer( (fSteps>0?100:nEpochs), GetName() );
1040  Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1041 
1042  // create histograms for overtraining monitoring
1043  Int_t nbinTest = Int_t(nEpochs/fTestRate);
1044  if(!IsSilentFile())
1045  {
1046  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
1047  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1048  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
1049  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1050  }
1052  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
1053 
1054  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
1055  timer.DrawProgressBar(0);
1056 
1057  // estimators
1058  Double_t trainE = -1;
1059  Double_t testE = -1;
1060 
1061  // start training cycles (epochs)
1062  for (Int_t i = 0; i < nEpochs; i++) {
1063 
1064  if (fExitFromTraining) break;
1065  fIPyCurrentIter = i;
1066  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1067  if ((i+1)%fTestRate == 0 || (i == 0)) {
1068  if (fSamplingTraining) {
1071  Data()->CreateSampling();
1072  }
1073  if (fSamplingTesting) {
1076  Data()->CreateSampling();
1077  }
1078  }
1079  }
1080  else {
1082  Data()->InitSampling(1.0,1.0);
1084  Data()->InitSampling(1.0,1.0);
1085  }
1087 
1088  TrainOneEpoch();
1089  DecaySynapseWeights(i >= lateEpoch);
1090 
1091  // monitor convergence of training and control sample
1092  if ((i+1)%fTestRate == 0) {
1093  trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
1094  testE = CalculateEstimator( Types::kTesting, i ); // estimator for test samplea
1095  if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1096  if(!IsSilentFile())
1097  {
1098  fEstimatorHistTrain->Fill( i+1, trainE );
1099  fEstimatorHistTest ->Fill( i+1, testE );
1100  }
1101  Bool_t success = kFALSE;
1102  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1103  success = kTRUE;
1104  }
1105  Data()->EventResult( success );
1106 
1107  SetCurrentValue( testE );
1108  if (HasConverged()) {
1109  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1110  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1111  i = newEpoch;
1113  }
1114  else {
1115  if (lateEpoch > i) lateEpoch = i;
1116  else break;
1117  }
1118  }
1119  }
1120 
1121  // draw progress bar (add convergence value)
1122  TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
1123  if (fSteps > 0) {
1124  Float_t progress = 0;
1125  if (Float_t(i)/nEpochs < fSamplingEpoch)
1126  progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1127  else
1128  progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1129 
1130  timer.DrawProgressBar( Int_t(progress), convText );
1131  }
1132  else {
1133  timer.DrawProgressBar( i, convText );
1134  }
1135  }
1136 }
1137 
1138 ////////////////////////////////////////////////////////////////////////////////
1139 /// train network over a single epoch/cyle of events
1140 
1142 {
1143  Int_t nEvents = Data()->GetNEvents();
1144 
1145  // randomize the order events will be presented, important for sequential mode
1146  Int_t* index = new Int_t[nEvents];
1147  for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1148  Shuffle(index, nEvents);
1149 
1150  // loop over all training events
1151  for (Int_t i = 0; i < nEvents; i++) {
1152 
1153  const Event * ev = GetEvent(index[i]);
1154  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1155  && (Data()->GetCurrentType() == Types::kTraining)){
1156  continue;
1157  }
1158 
1159  TrainOneEvent(index[i]);
1160 
1161  // do adjustments if in batch mode
1162  if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1164  if (fgPRINT_BATCH) {
1165  PrintNetwork();
1166  WaitForKeyboard();
1167  }
1168  }
1169 
1170  // debug in sequential mode
1171  if (fgPRINT_SEQ) {
1172  PrintNetwork();
1173  WaitForKeyboard();
1174  }
1175  }
1176 
1177  delete[] index;
1178 }
1179 
1180 ////////////////////////////////////////////////////////////////////////////////
1181 /// Input:
1182 /// index: the array to shuffle
1183 /// n: the size of the array
1184 /// Output:
1185 /// index: the shuffled indexes
1186 /// This method is used for sequential training
1187 
1189 {
1190  Int_t j, k;
1191  Int_t a = n - 1;
1192  for (Int_t i = 0; i < n; i++) {
1193  j = (Int_t) (frgen->Rndm() * a);
1194  if (j<n){ // address the 'worries' of coverity
1195  k = index[j];
1196  index[j] = index[i];
1197  index[i] = k;
1198  }
1199  }
1200 }
1201 
1202 ////////////////////////////////////////////////////////////////////////////////
1203 /// decay synapse weights
1204 /// in last 10 epochs, lower learning rate even more to find a good minimum
1205 
1207 {
1208  TSynapse* synapse;
1209  Int_t numSynapses = fSynapses->GetEntriesFast();
1210  for (Int_t i = 0; i < numSynapses; i++) {
1211  synapse = (TSynapse*)fSynapses->At(i);
1212  if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square.
1213  else synapse->DecayLearningRate(fDecayRate);
1214  }
1215 }
1216 
1217 ////////////////////////////////////////////////////////////////////////////////
1218 /// fast per-event training
1219 
1221 {
1222  GetEvent(ievt);
1223 
1224  // as soon as we know how to get event weights, get that here
1225 
1226  // note: the normalization of event weights will affect the choice
1227  // of learning rate, one will have to experiment to get the right value.
1228  // in general, if the "average" event weight is 1, the learning rate
1229  // should be good if set around 0.02 (a good value if all event weights are 1)
1230  Double_t eventWeight = 1.0;
1231 
1232  // get the desired output of this event
1233  Double_t desired;
1234  if (type == 0) desired = fOutput->GetMin(); // background //zjh
1235  else desired = fOutput->GetMax(); // signal //zjh
1236 
1237  // force the value for each input neuron
1238  Double_t x;
1239  TNeuron* neuron;
1240 
1241  for (UInt_t j = 0; j < GetNvar(); j++) {
1242  x = branchVar[j];
1243  if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1244  neuron = GetInputNeuron(j);
1245  neuron->ForceValue(x);
1246  }
1247 
1249  UpdateNetwork(desired, eventWeight);
1250 }
1251 
1252 ////////////////////////////////////////////////////////////////////////////////
1253 /// train network over a single event
1254 /// this uses the new event model
1255 
1257 {
1258  // note: the normalization of event weights will affect the choice
1259  // of learning rate, one will have to experiment to get the right value.
1260  // in general, if the "average" event weight is 1, the learning rate
1261  // should be good if set around 0.02 (a good value if all event weights are 1)
1262 
1263  const Event * ev = GetEvent(ievt);
1264  Double_t eventWeight = ev->GetWeight();
1265  ForceNetworkInputs( ev );
1267  if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1268  if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1269  else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1270 }
1271 
1272 ////////////////////////////////////////////////////////////////////////////////
1273 /// get the desired output of this event
1274 
1276 {
1277  return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
1278 }
1279 
1280 
1281 ////////////////////////////////////////////////////////////////////////////////
1282 /// update the network based on how closely
1283 /// the output matched the desired output
1284 
1286 {
1287  Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1288  if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ; //zjh
1289  else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
1290  else Log() << kFATAL << "Estimator type unspecified!!" << Endl; //zjh
1291  error *= eventWeight;
1292  GetOutputNeuron()->SetError(error);
1294  UpdateSynapses();
1295 }
1296 
1297 ////////////////////////////////////////////////////////////////////////////////
1298 /// update the network based on how closely
1299 /// the output matched the desired output
1300 
1301 void TMVA::MethodMLP::UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight)
1302 {
1303  for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1304  Double_t error = GetOutputNeuron( i )->GetActivationValue() - desired.at(i);
1305  error *= eventWeight;
1306  GetOutputNeuron( i )->SetError(error);
1307  }
1309  UpdateSynapses();
1310 }
1311 
1312 
1313 ////////////////////////////////////////////////////////////////////////////////
1314 /// have each neuron calculate its delta by backpropagation
1315 
1317 {
1318  TNeuron* neuron;
1319  Int_t numNeurons;
1320  Int_t numLayers = fNetwork->GetEntriesFast();
1321  TObjArray* curLayer;
1322 
1323  // step backwards through the network (backpropagation)
1324  // deltas calculated starting at output layer
1325  for (Int_t i = numLayers-1; i >= 0; i--) {
1326  curLayer = (TObjArray*)fNetwork->At(i);
1327  numNeurons = curLayer->GetEntriesFast();
1328 
1329  for (Int_t j = 0; j < numNeurons; j++) {
1330  neuron = (TNeuron*) curLayer->At(j);
1331  neuron->CalculateDelta();
1332  }
1333  }
1334 }
1335 
1336 ////////////////////////////////////////////////////////////////////////////////
1337 /// create genetics class similar to GeneticCut
1338 /// give it vector of parameter ranges (parameters = weights)
1339 /// link fitness function of this class to ComputeEstimator
1340 /// instantiate GA (see MethodCuts)
1341 /// run it
1342 /// then this should exist for GA, Minuit and random sampling
1343 
1345 {
1346  PrintMessage("Minimizing Estimator with GA");
1347 
1348  // define GA parameters
1349  fGA_preCalc = 1;
1350  fGA_SC_steps = 10;
1351  fGA_SC_rate = 5;
1352  fGA_SC_factor = 0.95;
1353  fGA_nsteps = 30;
1354 
1355  // ranges
1356  std::vector<Interval*> ranges;
1357 
1358  Int_t numWeights = fSynapses->GetEntriesFast();
1359  for (Int_t ivar=0; ivar< numWeights; ivar++) {
1360  ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1361  }
1362 
1363  FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
1364  gf->Run();
1365 
1366  Double_t estimator = CalculateEstimator();
1367  Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
1368 }
1369 
1370 ////////////////////////////////////////////////////////////////////////////////
1371 /// interface to the estimate
1372 
1373 Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1374 {
1375  return ComputeEstimator( parameters );
1376 }
1377 
1378 ////////////////////////////////////////////////////////////////////////////////
1379 /// this function is called by GeneticANN for GA optimization
1380 
1381 Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1382 {
1383  TSynapse* synapse;
1384  Int_t numSynapses = fSynapses->GetEntriesFast();
1385 
1386  for (Int_t i = 0; i < numSynapses; i++) {
1387  synapse = (TSynapse*)fSynapses->At(i);
1388  synapse->SetWeight(parameters.at(i));
1389  }
1390  if (fUseRegulator) UpdatePriors(); //zjh
1391 
1392  Double_t estimator = CalculateEstimator();
1393 
1394  return estimator;
1395 }
1396 
1397 ////////////////////////////////////////////////////////////////////////////////
1398 /// update synapse error fields and adjust the weights (if in sequential mode)
1399 
1401 {
1402  TNeuron* neuron;
1403  Int_t numNeurons;
1404  TObjArray* curLayer;
1405  Int_t numLayers = fNetwork->GetEntriesFast();
1406 
1407  for (Int_t i = 0; i < numLayers; i++) {
1408  curLayer = (TObjArray*)fNetwork->At(i);
1409  numNeurons = curLayer->GetEntriesFast();
1410 
1411  for (Int_t j = 0; j < numNeurons; j++) {
1412  neuron = (TNeuron*) curLayer->At(j);
1413  if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1414  else neuron->UpdateSynapsesSequential();
1415  }
1416  }
1417 }
1418 
1419 ////////////////////////////////////////////////////////////////////////////////
1420 /// just adjust the synapse weights (should be called in batch mode)
1421 
1423 {
1424  TNeuron* neuron;
1425  Int_t numNeurons;
1426  TObjArray* curLayer;
1427  Int_t numLayers = fNetwork->GetEntriesFast();
1428 
1429  for (Int_t i = numLayers-1; i >= 0; i--) {
1430  curLayer = (TObjArray*)fNetwork->At(i);
1431  numNeurons = curLayer->GetEntriesFast();
1432 
1433  for (Int_t j = 0; j < numNeurons; j++) {
1434  neuron = (TNeuron*) curLayer->At(j);
1435  neuron->AdjustSynapseWeights();
1436  }
1437  }
1438 }
1439 
1440 ////////////////////////////////////////////////////////////////////////////////
1441 
1443 {
1444  fPrior=0;
1445  fPriorDev.clear();
1446  Int_t nSynapses = fSynapses->GetEntriesFast();
1447  for (Int_t i=0;i<nSynapses;i++) {
1448  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1449  fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1450  fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1451  }
1452 }
1453 
1454 ////////////////////////////////////////////////////////////////////////////////
1455 
1457 {
1458  TMatrixD InvH(0,0);
1459  GetApproxInvHessian(InvH);
1460  Int_t numSynapses=fSynapses->GetEntriesFast();
1461  Int_t numRegulators=fRegulators.size();
1462  Float_t gamma=0,
1463  variance=1.; // Gaussian noise
1464  std::vector<Int_t> nWDP(numRegulators);
1465  std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1466  for (int i=0;i<numSynapses;i++) {
1467  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1468  Int_t idx=fRegulatorIdx[i];
1469  nWDP[idx]++;
1470  trace[idx]+=InvH[i][i];
1471  gamma+=1-fRegulators[idx]*InvH[i][i];
1472  weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1473  }
1474  if (fEstimator==kMSE) {
1475  if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1476  else variance=CalculateEstimator( Types::kTraining, 0 );
1477  }
1478 
1479  //Log() << kDEBUG << Endl;
1480  for (int i=0;i<numRegulators;i++)
1481  {
1482  //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
1483  fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1484  if (fRegulators[i]<0) fRegulators[i]=0;
1485  Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
1486  }
1487  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
1488  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
1489 
1490  Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
1491 
1492 }
1493 
1494 ////////////////////////////////////////////////////////////////////////////////
1495 
1496 void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate) //zjh
1497 {
1498  Int_t numSynapses=fSynapses->GetEntriesFast();
1499  InvHessian.ResizeTo( numSynapses, numSynapses );
1500  InvHessian=0;
1501  TMatrixD sens(numSynapses,1);
1502  TMatrixD sensT(1,numSynapses);
1503  Int_t nEvents = GetNEvents();
1504  for (Int_t i=0;i<nEvents;i++) {
1505  GetEvent(i);
1506  double outputValue=GetMvaValue(); // force calculation
1509  for (Int_t j = 0; j < numSynapses; j++){
1510  TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1511  synapses->InitDelta();
1512  synapses->CalculateDelta();
1513  sens[j][0]=sensT[0][j]=synapses->GetDelta();
1514  }
1515  if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1516  else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1517  }
1518 
1519  // TVectorD eValue(numSynapses);
1520  if (regulate) {
1521  for (Int_t i = 0; i < numSynapses; i++){
1522  InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1523  }
1524  }
1525  else {
1526  for (Int_t i = 0; i < numSynapses; i++){
1527  InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
1528  }
1529  }
1530 
1531  InvHessian.Invert();
1532 
1533 }
1534 
1535 ////////////////////////////////////////////////////////////////////////////////
1536 
1538 {
1539  Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation
1540 
1541  // no hessian (old training file) or no error reqested
1542  if (!fCalculateErrors || errLower==0 || errUpper==0)
1543  return MvaValue;
1544 
1545  Double_t MvaUpper,MvaLower,median,variance;
1546  Int_t numSynapses=fSynapses->GetEntriesFast();
1547  if (fInvHessian.GetNcols()!=numSynapses) {
1548  Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
1549  }
1550  TMatrixD sens(numSynapses,1);
1551  TMatrixD sensT(1,numSynapses);
1553  //GetOutputNeuron()->SetError(1.);
1555  for (Int_t i = 0; i < numSynapses; i++){
1556  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1557  synapses->InitDelta();
1558  synapses->CalculateDelta();
1559  sensT[0][i]=synapses->GetDelta();
1560  }
1561  sens.Transpose(sensT);
1562  TMatrixD sig=sensT*fInvHessian*sens;
1563  variance=sig[0][0];
1564  median=GetOutputNeuron()->GetValue();
1565 
1566  if (variance<0) {
1567  Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl;
1568  variance=0;
1569  }
1570  variance=sqrt(variance);
1571 
1572  //upper
1573  MvaUpper=fOutput->Eval(median+variance);
1574  if(errUpper)
1575  *errUpper=MvaUpper-MvaValue;
1576 
1577  //lower
1578  MvaLower=fOutput->Eval(median-variance);
1579  if(errLower)
1580  *errLower=MvaValue-MvaLower;
1581 
1582  return MvaValue;
1583 }
1584 
1585 
1586 #ifdef MethodMLP_UseMinuit__
1587 
1588 ////////////////////////////////////////////////////////////////////////////////
1589 /// minimize using Minuit
1590 
1591 void TMVA::MethodMLP::MinuitMinimize()
1592 {
1593  fNumberOfWeights = fSynapses->GetEntriesFast();
1594 
1595  TFitter* tfitter = new TFitter( fNumberOfWeights );
1596 
1597  // minuit-specific settings
1598  Double_t args[10];
1599 
1600  // output level
1601  args[0] = 2; // put to 0 for results only, or to -1 for no garbage
1602  tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
1603  tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
1604 
1605  double w[54];
1606 
1607  // init parameters
1608  for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1609  TString parName = Form("w%i", ipar);
1610  tfitter->SetParameter( ipar,
1611  parName, w[ipar], 0.1, 0, 0 );
1612  }
1613 
1614  // define the CFN function
1615  tfitter->SetFCN( &IFCN );
1616 
1617  // define fit strategy
1618  args[0] = 2;
1619  tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
1620 
1621  // now do the fit !
1622  args[0] = 1e-04;
1623  tfitter->ExecuteCommand( "MIGRAD", args, 1 );
1624 
1625  Bool_t doBetter = kFALSE;
1626  Bool_t doEvenBetter = kFALSE;
1627  if (doBetter) {
1628  args[0] = 1e-04;
1629  tfitter->ExecuteCommand( "IMPROVE", args, 1 );
1630 
1631  if (doEvenBetter) {
1632  args[0] = 500;
1633  tfitter->ExecuteCommand( "MINOS", args, 1 );
1634  }
1635  }
1636 }
1637 
1638 _____________________________________________________________________________
1639 void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1640 {
1641  // Evaluate the minimisation function ----------------------------------------------------
1642  //
1643  // Input parameters:
1644  // npars: number of currently variable parameters
1645  // CAUTION: this is not (necessarily) the dimension of the fitPars vector !
1646  // fitPars: array of (constant and variable) parameters
1647  // iflag: indicates what is to be calculated (see example below)
1648  // grad: array of gradients
1649  //
1650  // Output parameters:
1651  // f: the calculated function value.
1652  // grad: the (optional) vector of first derivatives).
1653  // ---------------------------------------------------------------------------------------
1654  ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1655 }
1656 
1657 TTHREAD_TLS(Int_t) nc = 0;
1658 TTHREAD_TLS(double) minf = 1000000;
1659 
1660 void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1661 {
1662  // first update the weights
1663  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1664  TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1665  synapse->SetWeight(fitPars[ipar]);
1666  }
1667 
1668  // now compute the estimator
1669  f = CalculateEstimator();
1670 
1671  nc++;
1672  if (f < minf) minf = f;
1673  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
1674  Log() << kDEBUG << Endl;
1675  Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
1676 }
1677 
1678 ////////////////////////////////////////////////////////////////////////////////
1679 /// global "this" pointer to be used in minuit
1680 
1681 TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1682 {
1683  return fgThis;
1684 }
1685 
1686 #endif
1687 
1688 
1689 ////////////////////////////////////////////////////////////////////////////////
1690 /// write specific classifier response
1691 
1692 void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1693 {
1694  MethodANNBase::MakeClassSpecific(fout, className);
1695 }
1696 
1697 ////////////////////////////////////////////////////////////////////////////////
1698 /// get help message text
1699 ///
1700 /// typical length of text line:
1701 /// "|--------------------------------------------------------------|"
1702 
1704 {
1705  TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1706  TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1707 
1708  Log() << Endl;
1709  Log() << col << "--- Short description:" << colres << Endl;
1710  Log() << Endl;
1711  Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1712  Log() << "forward multilayer perceptron impementation. The MLP has a user-" << Endl;
1713  Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
1714  Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1715  Log() << "signal and one background). " << Endl;
1716  Log() << Endl;
1717  Log() << col << "--- Performance optimisation:" << colres << Endl;
1718  Log() << Endl;
1719  Log() << "Neural networks are stable and performing for a large variety of " << Endl;
1720  Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
1721  Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1722  Log() << "number of input variables that have only little discrimination power. " << Endl;
1723  Log() << "" << Endl;
1724  Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1725  Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1726  Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1727  Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1728  Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
1729  Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
1730  Log() << "competitive results)." << Endl;
1731  Log() << Endl;
1732  Log() << col << "Overtraining: " << colres
1733  << "only the TMlpANN performs an explicit separation of the" << Endl;
1734  Log() << "full training sample into independent training and validation samples." << Endl;
1735  Log() << "We have found that in most high-energy physics applications the " << Endl;
1736  Log() << "avaliable degrees of freedom (training events) are sufficient to " << Endl;
1737  Log() << "constrain the weights of the relatively simple architectures required" << Endl;
1738  Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
1739  Log() << "the use of validation samples would only reduce the available training" << Endl;
1740  Log() << "information. However, if the perrormance on the training sample is " << Endl;
1741  Log() << "found to be significantly better than the one found with the inde-" << Endl;
1742  Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
1743  Log() << "are printed to standard output at the end of each training job." << Endl;
1744  Log() << Endl;
1745  Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
1746  Log() << Endl;
1747  Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
1748  Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1749  Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
1750  Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1751  Log() << "in favour of more neurons in the first hidden layer." << Endl;
1752  Log() << "" << Endl;
1753  Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
1754  Log() << "adjustable weights is small compared to the training sample size," << Endl;
1755  Log() << "using a large number of training samples should not lead to overtraining." << Endl;
1756 }
1757 
void WaitForKeyboard()
wait for keyboard input, for debugging
Float_t fSamplingFraction
Definition: MethodMLP.h:200
void GetHelpMessage() const
get help message text
Definition: MethodMLP.cxx:1703
Config & gConfig()
Definition: Config.cxx:43
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3125
An array of TObjects.
Definition: TObjArray.h:39
virtual Double_t GetMax()=0
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1002
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void ForceNetworkCalculations()
calculate input values to each neuron
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Definition: TMatrixT.cxx:1469
void CreateWeightMonitoringHists(const TString &bulkname, std::vector< TH1 *> *hv=0) const
virtual ~MethodMLP()
destructor nothing to be done
Definition: MethodMLP.cxx:138
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:206
Double_t Log(Double_t x)
Definition: TMath.h:526
void Init()
default initializations
Definition: MethodMLP.cxx:164
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
Definition: TNeuron.cxx:89
void DecayLearningRate(Double_t rate)
Definition: TSynapse.h:68
Double_t fLastAlpha
Definition: MethodMLP.h:207
static const Bool_t fgPRINT_SEQ
Definition: MethodMLP.h:240
virtual Double_t Rndm()
Machine independent random number generator.
Definition: TRandom3.cxx:94
Double_t GetError()
Definition: MethodMLP.cxx:965
float Float_t
Definition: RtypesCore.h:53
Int_t fGA_SC_rate
Definition: MethodMLP.h:224
Bool_t fSamplingTesting
Definition: MethodMLP.h:204
void SetDEDw(Double_t DEDw)
Definition: TSynapse.h:91
UInt_t GetNvar() const
Definition: MethodBase.h:340
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:501
virtual Double_t GetMin()=0
Int_t GetNcols() const
Definition: TMatrixTBase.h:137
Int_t fResetStep
Definition: MethodMLP.h:209
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:302
MsgLogger & Log() const
Definition: Configurable.h:128
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Double_t GetDelta()
Definition: TSynapse.h:93
void TrainOneEpoch()
train network over a single epoch/cyle of events
Definition: MethodMLP.cxx:1141
TString fTrainMethodS
Definition: MethodMLP.h:198
EAnalysisType
Definition: Types.h:129
void SteepestDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:773
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
Definition: MethodMLP.cxx:807
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:186
Double_t fDecayRate
Definition: MethodMLP.h:213
bool fExitFromTraining
Definition: MethodBase.h:443
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
TArc * a
Definition: textangle.C:12
const Bool_t kFALSE
Definition: Rtypes.h:92
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
Definition: MethodMLP.cxx:1220
UInt_t GetNClasses() const
Definition: DataSetInfo.h:154
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
Definition: TFitter.cxx:87
UInt_t GetNTargets() const
Definition: MethodBase.h:342
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: TrainingMetho...
Definition: MethodMLP.cxx:191
Int_t fGA_nsteps
Definition: MethodMLP.h:221
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Definition: MethodMLP.cxx:289
Double_t GetActivationValue() const
Definition: TNeuron.h:117
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:352
virtual Double_t Eval(Double_t arg)=0
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Definition: MethodMLP.cxx:486
TObject * At(Int_t idx) const
Definition: TObjArray.h:167
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
Definition: TMatrixT.cxx:1210
Int_t fGA_preCalc
Definition: MethodMLP.h:222
Float_t fSamplingEpoch
Definition: MethodMLP.h:201
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
Definition: MethodMLP.cxx:949
TActivation * fOutput
double sqrt(double)
Tools & gTools()
Definition: Tools.cxx:79
virtual void SetFCN(void *fcn) R__DEPRECATED(6
Specify the address of the fitting algorithm (from the interpreter)
Definition: TFitter.cxx:546
void AdjustSynapseWeights()
adjust the pre-synapses&#39; weights for each neuron (input neuron has no pre-synapse) this method should...
Definition: TNeuron.cxx:270
TStopwatch timer
Definition: pirndm.C:37
Double_t x[n]
Definition: legend1.C:17
void Shuffle(Int_t *index, Int_t n)
Input: index: the array to shuffle n: the size of the array Output: index: the shuffled indexes This ...
Definition: MethodMLP.cxx:1188
ETrainingMethod fTrainingMethod
Definition: MethodMLP.h:197
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:80
void(* FCN)(Int_t &npar, Double_t *gin, Double_t &f, Double_t *u, Int_t flag)
Definition: testMinim.cxx:39
std::vector< TH1 * > fEpochMonHistB
void PrintMessage(TString message, Bool_t force=kFALSE) const
print messages, turn off printing by setting verbose and debug flag appropriately ...
const Event * GetEvent() const
Definition: MethodBase.h:745
DataSet * Data() const
Definition: MethodBase.h:405
TObjArray * fNetwork
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:217
UInt_t GetClass() const
Definition: Event.h:89
void CalculateDelta()
calculate/adjust the error field for this synapse
Definition: TSynapse.cxx:114
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
Definition: MethodMLP.cxx:1496
EBPTrainingMode fBPMode
Definition: MethodMLP.h:214
Double_t fGA_SC_factor
Definition: MethodMLP.h:225
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:352
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
Definition: MethodBase.cxx:171
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
Bool_t DoRegression() const
Definition: MethodBase.h:434
void ComputeDEDw()
Definition: MethodMLP.cxx:696
bool fUseRegulator
Definition: MethodMLP.h:188
virtual void ProcessOptions()
do nothing specific at this moment
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Definition: MethodMLP.cxx:1256
Bool_t fEpochMon
Definition: MethodMLP.h:218
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodMLP.cxx:1692
UInt_t fIPyCurrentIter
Definition: MethodBase.h:444
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
Definition: TMatrixT.cxx:1396
Float_t fImprovement
current value
Int_t fUpdateLimit
Definition: MethodMLP.h:195
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
Definition: MethodMLP.cxx:1400
std::vector< Double_t > fPriorDev
Definition: MethodMLP.h:191
std::vector< Float_t > & GetTargets()
Definition: Event.h:105
Double_t GetCEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1019
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Definition: MethodMLP.cxx:1344
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:413
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:353
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:24
TObjArray * fSynapses
Bool_t DoMulticlass() const
Definition: MethodBase.h:435
void InitializeLearningRates()
initialize learning rates of synapses, used only by backpropagation
Definition: MethodMLP.cxx:275
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum ...
Definition: MethodMLP.cxx:1206
const int nEvents
Definition: testRooFit.cxx:42
Double_t GetDEDw()
Definition: TSynapse.h:92
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
Definition: MethodMLP.cxx:1275
virtual void PrintNetwork() const
print network representation, for debugging
double gamma(double x)
void SetLearningRate(Double_t rate)
Definition: TSynapse.h:62
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Definition: MethodMLP.cxx:786
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:104
UInt_t GetNTargets() const
Definition: DataSetInfo.h:129
Bool_t HasConverged(Bool_t withinConvergenceBand=kFALSE)
gives back true if the last "steps" steps have lead to an improvement of the "fitness" of the "indivi...
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Definition: MethodMLP.cxx:1537
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with backpropagation algorithm
Definition: MethodMLP.cxx:1036
SVector< double, 2 > v
Definition: Dict.h:5
const char * GetName() const
Definition: MethodBase.h:330
Float_t fWeightRange
Definition: MethodMLP.h:230
Int_t fSteps
minimum improvement which counts as improvement
UInt_t fIPyMaxIter
Definition: MethodBase.h:444
Float_t Progress()
returns a float from 0 (just started) to 1 (finished)
unsigned int UInt_t
Definition: RtypesCore.h:42
Int_t GetEntriesFast() const
Definition: TObjArray.h:66
char * Form(const char *fmt,...)
std::vector< TH1 * > fEpochMonHistW
std::vector< Double_t > fRegulators
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
Definition: MethodMLP.cxx:839
TNeuron * GetInputNeuron(Int_t index)
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
Definition: MethodMLP.cxx:1285
Bool_t IsSilentFile()
Definition: MethodBase.h:375
static const Bool_t fgPRINT_BATCH
Definition: MethodMLP.h:241
std::vector< Int_t > fRegulatorIdx
TString fBpModeS
Definition: MethodMLP.h:215
Double_t GetValue() const
Definition: TNeuron.h:116
Double_t GetWeight()
Definition: TSynapse.h:59
void SetError(Double_t error)
set error, this should only be done for an output neuron
Definition: TNeuron.cxx:219
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:680
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Definition: MethodMLP.cxx:671
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:565
#define ClassImp(name)
Definition: Rtypes.h:279
double f(double x)
void CalculateNeuronDeltas()
have each neuron calculate its delta by backpropagation
Definition: MethodMLP.cxx:1316
double Double_t
Definition: RtypesCore.h:55
Bool_t WriteOptionsReference() const
Definition: Config.h:66
void InitDelta()
Definition: TSynapse.h:89
TNeuron * GetOutputNeuron(Int_t index=0)
Bool_t IsNormalised() const
Definition: MethodBase.h:490
void ForceNetworkInputs(const Event *ev, Int_t ignoreIndex=-1)
force the input values of the input neurons force the value for each input neuron ...
int type
Definition: TGX11.cxx:120
Double_t fLearnRate
Definition: MethodMLP.h:212
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:114
The TH1 histogram class.
Definition: TH1.h:80
void SimulateEvent(const Event *ev)
Definition: MethodMLP.cxx:733
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
Double_t fPrior
Definition: MethodMLP.h:190
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
Definition: MethodMLP.cxx:1422
void AddPreDefVal(const T &)
Definition: Configurable.h:174
Double_t fTau
Definition: MethodMLP.h:208
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
Definition: TNeuron.cxx:249
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
Definition: TNeuron.cxx:231
void ExitFromTraining()
Definition: MethodBase.h:458
Int_t fBatchSize
Definition: MethodMLP.h:216
const TString & GetOptions() const
Definition: Configurable.h:90
void SetWeight(Double_t weight)
set synapse weight
Definition: TSynapse.cxx:74
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Definition: MethodMLP.cxx:152
#define REGISTER_METHOD(CLASS)
for example
void ProcessOptions()
process user options
Definition: MethodMLP.cxx:244
IPythonInteractive * fInteractive
Definition: MethodBase.h:442
Float_t fSamplingWeight
Definition: MethodMLP.h:202
bool fCalculateErrors
Definition: MethodMLP.h:189
Double_t EstimatorFunction(std::vector< Double_t > &parameters)
interface to the estimate
Definition: MethodMLP.cxx:1373
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
Definition: MethodMLP.cxx:1381
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:229
std::vector< TH1 * > fEpochMonHistS
Bool_t IsSignal(const Event *ev) const
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:433
std::vector< std::pair< Float_t, Float_t > > * fDeviationsFromTargets
Definition: MethodMLP.h:228
void SetCurrentValue(Float_t value)
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition: Tools.cxx:127
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
Double_t DerivDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:824
void UpdateRegulators()
Definition: MethodMLP.cxx:1456
Int_t fGA_SC_steps
Definition: MethodMLP.h:223
double exp(double)
const Bool_t kTRUE
Definition: Rtypes.h:91
void CalculateDelta()
calculate error field
Definition: TNeuron.cxx:121
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:451
const Int_t n
Definition: legend1.C:16
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
Definition: MethodMLP.cxx:90
virtual Double_t EvalDerivative(Double_t arg)=0
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:432
char name[80]
Definition: TGX11.cxx:109
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:360
Bool_t fSamplingTraining
Definition: MethodMLP.h:203
std::vector< Float_t > * GetTargetsForMulticlass(const Event *ev)
const char * Data() const
Definition: TString.h:349
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter ipar : parameter number parname : parameter name value : initial p...
Definition: TFitter.cxx:591