Logo ROOT   6.14/05
Reference Guide
MethodMLP.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard v. Toerne, Jan Therhaag, Jiahang Zhong
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodMLP *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * ANN Multilayer Perceptron class for the discrimination of signal *
12  * from background. BFGS implementation based on TMultiLayerPerceptron *
13  * class from ROOT (http://root.cern.ch). *
14  * *
15  * Authors (alphabetical): *
16  * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19  * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20  * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25  * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26  * *
27  * Copyright (c) 2005-2011: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * MPI-K Heidelberg, Germany *
31  * U. of Bonn, Germany *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 /*! \class TMVA::MethodMLP
39 \ingroup TMVA
40 
41 Multilayer Perceptron class built off of MethodANNBase
42 
43 */
44 
45 #include "TMVA/MethodMLP.h"
46 
47 #include "TMVA/Config.h"
48 #include "TMVA/Configurable.h"
49 #include "TMVA/ConvergenceTest.h"
50 #include "TMVA/ClassifierFactory.h"
51 #include "TMVA/DataSet.h"
52 #include "TMVA/DataSetInfo.h"
53 #include "TMVA/FitterBase.h"
54 #include "TMVA/GeneticFitter.h"
55 #include "TMVA/IFitterTarget.h"
56 #include "TMVA/IMethod.h"
57 #include "TMVA/Interval.h"
58 #include "TMVA/MethodANNBase.h"
59 #include "TMVA/MsgLogger.h"
60 #include "TMVA/TNeuron.h"
61 #include "TMVA/TSynapse.h"
62 #include "TMVA/Timer.h"
63 #include "TMVA/Tools.h"
64 #include "TMVA/Types.h"
65 
66 #include "TH1.h"
67 #include "TString.h"
68 #include "TTree.h"
69 #include "Riostream.h"
70 #include "TFitter.h"
71 #include "TMatrixD.h"
72 #include "TMath.h"
73 #include "TFile.h"
74 
75 #include <cmath>
76 #include <vector>
77 
78 #ifdef MethodMLP_UseMinuit__
79 TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
80 Bool_t MethodMLP_UseMinuit = kTRUE;
81 #endif
82 
83 REGISTER_METHOD(MLP)
84 
86 
87  using std::vector;
88 
89 ////////////////////////////////////////////////////////////////////////////////
90 /// standard constructor
91 
93  const TString& methodTitle,
94  DataSetInfo& theData,
95  const TString& theOption)
96  : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption),
97  fUseRegulator(false), fCalculateErrors(false),
98  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
99  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
100  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
101  fSamplingTraining(false), fSamplingTesting(false),
102  fLastAlpha(0.0), fTau(0.),
103  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
104  fBPMode(kSequential), fBpModeS("None"),
105  fBatchSize(0), fTestRate(0), fEpochMon(false),
106  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
107  fGA_SC_rate(0), fGA_SC_factor(0.0),
108  fDeviationsFromTargets(0),
109  fWeightRange (1.0)
110 {
111 
112 }
113 
114 ////////////////////////////////////////////////////////////////////////////////
115 /// constructor from a weight file
116 
118  const TString& theWeightFile)
119  : MethodANNBase( Types::kMLP, theData, theWeightFile),
120  fUseRegulator(false), fCalculateErrors(false),
121  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
124  fSamplingTraining(false), fSamplingTesting(false),
125  fLastAlpha(0.0), fTau(0.),
126  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
127  fBPMode(kSequential), fBpModeS("None"),
128  fBatchSize(0), fTestRate(0), fEpochMon(false),
130  fGA_SC_rate(0), fGA_SC_factor(0.0),
132  fWeightRange (1.0)
133 {
134 }
135 
136 ////////////////////////////////////////////////////////////////////////////////
137 /// destructor
138 /// nothing to be done
139 
141 {
142 }
143 
145 {
146  Train(NumCycles());
147 }
148 
149 
150 
151 ////////////////////////////////////////////////////////////////////////////////
152 /// MLP can handle classification with 2 classes and regression with one regression-target
153 
155 {
156  if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
157  if (type == Types::kMulticlass ) return kTRUE;
158  if (type == Types::kRegression ) return kTRUE;
159 
160  return kFALSE;
161 }
162 
163 ////////////////////////////////////////////////////////////////////////////////
164 /// default initializations
165 
167 {
168  // the minimum requirement to declare an event signal-like
169  SetSignalReferenceCut( 0.5 );
170 #ifdef MethodMLP_UseMinuit__
171  fgThis = this;
172 #endif
173 }
174 
175 ////////////////////////////////////////////////////////////////////////////////
176 /// define the options (their key words) that can be set in the option string
177 ///
178 /// know options:
179 ///
180 /// - TrainingMethod <string> Training method
181 /// available values are:
182 /// - BP Back-Propagation <default>
183 /// - GA Genetic Algorithm (takes a LONG time)
184 ///
185 /// - LearningRate <float> NN learning rate parameter
186 /// - DecayRate <float> Decay rate for learning parameter
187 /// - TestRate <int> Test for overtraining performed at each #th epochs
188 ///
189 /// - BPMode <string> Back-propagation learning mode
190 /// available values are:
191 /// - sequential <default>
192 /// - batch
193 ///
194 /// - BatchSize <int> Batch size: number of events/batch, only set if in Batch Mode,
195 /// - -1 for BatchSize=number_of_events
196 
198 {
199  DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
200  "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
201  AddPreDefVal(TString("BP"));
202  AddPreDefVal(TString("GA"));
203  AddPreDefVal(TString("BFGS"));
204 
205  DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
206  DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
207  DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
208  DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
209 
210  DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
211  DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
212  DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
213 
214  DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
215  DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
216 
217  DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
218  DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
219 
220  DeclareOptionRef(fBpModeS="sequential", "BPMode",
221  "Back-propagation learning mode: sequential or batch");
222  AddPreDefVal(TString("sequential"));
223  AddPreDefVal(TString("batch"));
224 
225  DeclareOptionRef(fBatchSize=-1, "BatchSize",
226  "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
227 
228  DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
229  "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
230 
231  DeclareOptionRef(fSteps=-1, "ConvergenceTests",
232  "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
233 
234  DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
235  "Use regulator to avoid over-training"); //zjh
236  DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit",
237  "Maximum times of regulator update"); //zjh
238  DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
239  "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh
240 
241  DeclareOptionRef(fWeightRange=1.0, "WeightRange",
242  "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
243 
244 }
245 
246 ////////////////////////////////////////////////////////////////////////////////
247 /// process user options
248 
250 {
252 
253 
255  Log() << kINFO
256  << "Will ignore negative events in training!"
257  << Endl;
258  }
259 
260 
261  if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
262  else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
263  else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
264 
265  if (fBpModeS == "sequential") fBPMode = kSequential;
266  else if (fBpModeS == "batch") fBPMode = kBatch;
267 
268  // InitializeLearningRates();
269 
270  if (fBPMode == kBatch) {
272  Int_t numEvents = Data()->GetNEvents();
273  if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
274  }
275 }
276 
277 ////////////////////////////////////////////////////////////////////////////////
278 /// initialize learning rates of synapses, used only by back propagation
279 
281 {
282  Log() << kDEBUG << "Initialize learning rates" << Endl;
283  TSynapse *synapse;
284  Int_t numSynapses = fSynapses->GetEntriesFast();
285  for (Int_t i = 0; i < numSynapses; i++) {
286  synapse = (TSynapse*)fSynapses->At(i);
287  synapse->SetLearningRate(fLearnRate);
288  }
289 }
290 
291 ////////////////////////////////////////////////////////////////////////////////
292 /// calculate the estimator that training is attempting to minimize
293 
295 {
296  // sanity check
297  if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
298  Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
299  }
300 
301  Types::ETreeType saveType = Data()->GetCurrentType();
302  Data()->SetCurrentType(treeType);
303 
304  // if epochs are counted create monitoring histograms (only available for classification)
305  TString type = (treeType == Types::kTraining ? "train" : "test");
306  TString name = Form("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
307  TString nameB = name + "_B";
308  TString nameS = name + "_S";
309  Int_t nbin = 100;
310  Float_t limit = 2;
311  TH1* histS = 0;
312  TH1* histB = 0;
313  if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
314  histS = new TH1F( nameS, nameS, nbin, -limit, limit );
315  histB = new TH1F( nameB, nameB, nbin, -limit, limit );
316  }
317 
318  Double_t estimator = 0;
319 
320  // loop over all training events
321  Int_t nEvents = GetNEvents();
322  UInt_t nClasses = DataInfo().GetNClasses();
323  UInt_t nTgts = DataInfo().GetNTargets();
324 
325 
326  Float_t sumOfWeights = 0.f;
327  if( fWeightRange < 1.f ){
328  fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents);
329  }
330 
331  for (Int_t i = 0; i < nEvents; i++) {
332 
333  const Event* ev = GetEvent(i);
334 
336  && (saveType == Types::kTraining)){
337  continue;
338  }
339 
340  Double_t w = ev->GetWeight();
341 
342  ForceNetworkInputs( ev );
344 
345  Double_t d = 0, v = 0;
346  if (DoRegression()) {
347  for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
348  v = GetOutputNeuron( itgt )->GetActivationValue();
349  Double_t targetValue = ev->GetTarget( itgt );
350  Double_t dt = v - targetValue;
351  d += (dt*dt);
352  }
353  estimator += d*w;
354  } else if (DoMulticlass() ) {
355  UInt_t cls = ev->GetClass();
356  if (fEstimator==kCE){
357  Double_t norm(0);
358  for (UInt_t icls = 0; icls < nClasses; icls++) {
359  Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
360  norm += exp( activationValue );
361  if(icls==cls)
362  d = exp( activationValue );
363  }
364  d = -TMath::Log(d/norm);
365  }
366  else{
367  for (UInt_t icls = 0; icls < nClasses; icls++) {
368  Double_t desired = (icls==cls) ? 1.0 : 0.0;
369  v = GetOutputNeuron( icls )->GetActivationValue();
370  d = (desired-v)*(desired-v);
371  }
372  }
373  estimator += d*w; //zjh
374  } else {
375  Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
377  if (fEstimator==kMSE) d = (desired-v)*(desired-v); //zjh
378  else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v)); //zjh
379  estimator += d*w; //zjh
380  }
381 
383  fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
384 
385  sumOfWeights += w;
386 
387 
388  // fill monitoring histograms
389  if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
390  else if (histB != 0) histB->Fill( float(v), float(w) );
391  }
392 
393 
394  if( fDeviationsFromTargets ) {
395  std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
396 
397  Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
398  estimator = 0.f;
399 
400  Float_t weightRangeCut = fWeightRange*sumOfWeights;
401  Float_t weightSum = 0.f;
402  for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
403  float deviation = (*itDev).first;
404  float devWeight = (*itDev).second;
405  weightSum += devWeight; // add the weight of this event
406  if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange
407  estimator += devWeight*deviation;
408  }
409  }
410 
411  sumOfWeights = sumOfWeightsInRange;
412  delete fDeviationsFromTargets;
413  }
414 
415  if (histS != 0) fEpochMonHistS.push_back( histS );
416  if (histB != 0) fEpochMonHistB.push_back( histB );
417 
418  //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
419  //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
420  //else estimator = estimator*0.5/Float_t(nEvents);
421  if (DoRegression()) estimator = estimator/Float_t(sumOfWeights);
422  else if (DoMulticlass()) estimator = estimator/Float_t(sumOfWeights);
423  else estimator = estimator/Float_t(sumOfWeights);
424 
425 
426  //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh
427 
428  Data()->SetCurrentType( saveType );
429 
430  // provide epoch-wise monitoring
431  if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
432  CreateWeightMonitoringHists( Form("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
433  }
434 
435  return estimator;
436 }
437 
438 ////////////////////////////////////////////////////////////////////////////////
439 
441 {
442  if (fNetwork == 0) {
443  //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
444  Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
446  }
447  Log() << kDEBUG << "reinitialize learning rates" << Endl;
449  Log() << kHEADER;
450  PrintMessage("Training Network");
451  Log() << Endl;
452  Int_t nEvents=GetNEvents();
453  Int_t nSynapses=fSynapses->GetEntriesFast();
454  if (nSynapses>nEvents)
455  Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
456 
457  fIPyMaxIter = nEpochs;
459  std::vector<TString> titles = {"Error on training set", "Error on test set"};
460  fInteractive->Init(titles);
461  }
462 
463 #ifdef MethodMLP_UseMinuit__
464  if (useMinuit) MinuitMinimize();
465 #else
467  else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
468  else BackPropagationMinimize(nEpochs);
469 #endif
470 
471  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
472  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
473  if (fUseRegulator){
474  Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
476  Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
477  }
478 
480  {
481  Int_t numSynapses=fSynapses->GetEntriesFast();
482  fInvHessian.ResizeTo(numSynapses,numSynapses);
484  }
486 }
487 
488 ////////////////////////////////////////////////////////////////////////////////
489 /// train network with BFGS algorithm
490 
492 {
493  Timer timer( (fSteps>0?100:nEpochs), GetName() );
494 
495  // create histograms for overtraining monitoring
496  Int_t nbinTest = Int_t(nEpochs/fTestRate);
497  if(!IsSilentFile())
498  {
499  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
500  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
501  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
502  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
503  }
504 
505  Int_t nSynapses = fSynapses->GetEntriesFast();
506  Int_t nWeights = nSynapses;
507 
508  for (Int_t i=0;i<nSynapses;i++) {
509  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
510  synapse->SetDEDw(0.0);
511  }
512 
513  std::vector<Double_t> buffer( nWeights );
514  for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
515 
516  TMatrixD Dir ( nWeights, 1 );
517  TMatrixD Hessian ( nWeights, nWeights );
518  TMatrixD Gamma ( nWeights, 1 );
519  TMatrixD Delta ( nWeights, 1 );
520  Int_t RegUpdateCD=0; //zjh
521  Int_t RegUpdateTimes=0; //zjh
522  Double_t AccuError=0;
523 
524  Double_t trainE = -1;
525  Double_t testE = -1;
526 
527  fLastAlpha = 0.;
528 
530  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
531 
532  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
533  timer.DrawProgressBar( 0 );
534 
535  // start training cycles (epochs)
536  for (Int_t i = 0; i < nEpochs; i++) {
537 
538  if (fExitFromTraining) break;
539  fIPyCurrentIter = i;
540  if (Float_t(i)/nEpochs < fSamplingEpoch) {
541  if ((i+1)%fTestRate == 0 || (i == 0)) {
542  if (fSamplingTraining) {
545  Data()->CreateSampling();
546  }
547  if (fSamplingTesting) {
550  Data()->CreateSampling();
551  }
552  }
553  }
554  else {
556  Data()->InitSampling(1.0,1.0);
558  Data()->InitSampling(1.0,1.0);
559  }
561 
562  //zjh
563  if (fUseRegulator) {
564  UpdatePriors();
565  RegUpdateCD++;
566  }
567  //zjh
568 
569  SetGammaDelta( Gamma, Delta, buffer );
570 
571  if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
572  SteepestDir( Dir );
573  Hessian.UnitMatrix();
574  RegUpdateCD=0; //zjh
575  }
576  else {
577  if (GetHessian( Hessian, Gamma, Delta )) {
578  SteepestDir( Dir );
579  Hessian.UnitMatrix();
580  RegUpdateCD=0; //zjh
581  }
582  else SetDir( Hessian, Dir );
583  }
584 
585  Double_t dError=0; //zjh
586  if (DerivDir( Dir ) > 0) {
587  SteepestDir( Dir );
588  Hessian.UnitMatrix();
589  RegUpdateCD=0; //zjh
590  }
591  if (LineSearch( Dir, buffer, &dError )) { //zjh
592  Hessian.UnitMatrix();
593  SteepestDir( Dir );
594  RegUpdateCD=0; //zjh
595  if (LineSearch(Dir, buffer, &dError)) { //zjh
596  i = nEpochs;
597  Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
598  }
599  }
600 
601  //zjh+
602  if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
603  AccuError+=dError;
604 
605  if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
606  Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
608  Hessian.UnitMatrix();
609  RegUpdateCD=0;
610  RegUpdateTimes++;
611  AccuError=0;
612  }
613  //zjh-
614 
615  // monitor convergence of training and control sample
616  if ((i+1)%fTestRate == 0) {
617  //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample //zjh
618  //testE = CalculateEstimator( Types::kTesting, i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
619  trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample //zjh
620  testE = CalculateEstimator( Types::kTesting, i ) ; // estimator for test sample //zjh
621  if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
622  if(!IsSilentFile()) //saved to see in TMVAGui, no needed without file
623  {
624  fEstimatorHistTrain->Fill( i+1, trainE );
625  fEstimatorHistTest ->Fill( i+1, testE );
626  }
627  Bool_t success = kFALSE;
628  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
629  success = kTRUE;
630  }
631  Data()->EventResult( success );
632 
633  SetCurrentValue( testE );
634  if (HasConverged()) {
635  if (Float_t(i)/nEpochs < fSamplingEpoch) {
636  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
637  i = newEpoch;
639  }
640  else break;
641  }
642  }
643 
644  // draw progress
645  TString convText = Form( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh
646  if (fSteps > 0) {
647  Float_t progress = 0;
648  if (Float_t(i)/nEpochs < fSamplingEpoch)
649  // progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
650  progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
651  else
652  {
653  // progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
654  progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
655  }
656  Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
657  if (progress2>progress) progress=progress2; //zjh
658  timer.DrawProgressBar( Int_t(progress), convText );
659  }
660  else {
661  Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
662  if (progress<i) progress=i; //zjh
663  timer.DrawProgressBar( progress, convText ); //zjh
664  }
665 
666  // some verbose output
667  if (fgPRINT_SEQ) {
668  PrintNetwork();
669  WaitForKeyboard();
670  }
671  }
672 }
673 
674 ////////////////////////////////////////////////////////////////////////////////
675 
676 void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
677 {
678  Int_t nWeights = fSynapses->GetEntriesFast();
679 
680  Int_t IDX = 0;
681  Int_t nSynapses = fSynapses->GetEntriesFast();
682  for (Int_t i=0;i<nSynapses;i++) {
683  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
684  Gamma[IDX++][0] = -synapse->GetDEDw();
685  }
686 
687  for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
688 
689  ComputeDEDw();
690 
691  IDX = 0;
692  for (Int_t i=0;i<nSynapses;i++)
693  {
694  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
695  Gamma[IDX++][0] += synapse->GetDEDw();
696  }
697 }
698 
699 ////////////////////////////////////////////////////////////////////////////////
700 
702 {
703  Int_t nSynapses = fSynapses->GetEntriesFast();
704  for (Int_t i=0;i<nSynapses;i++) {
705  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
706  synapse->SetDEDw( 0.0 );
707  }
708 
709  Int_t nEvents = GetNEvents();
710  Int_t nPosEvents = nEvents;
711  for (Int_t i=0;i<nEvents;i++) {
712 
713  const Event* ev = GetEvent(i);
715  && (Data()->GetCurrentType() == Types::kTraining)){
716  --nPosEvents;
717  continue;
718  }
719 
720  SimulateEvent( ev );
721 
722  for (Int_t j=0;j<nSynapses;j++) {
723  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
724  synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
725  }
726  }
727 
728  for (Int_t i=0;i<nSynapses;i++) {
729  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
730  Double_t DEDw=synapse->GetDEDw(); //zjh
731  if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
732  synapse->SetDEDw( DEDw / nPosEvents ); //zjh
733  }
734 }
735 
736 ////////////////////////////////////////////////////////////////////////////////
737 
739 {
740  Double_t eventWeight = ev->GetWeight();
741 
742  ForceNetworkInputs( ev );
744 
745  if (DoRegression()) {
746  UInt_t ntgt = DataInfo().GetNTargets();
747  for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
748  Double_t desired = ev->GetTarget(itgt);
749  Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
750  GetOutputNeuron( itgt )->SetError(error);
751  }
752  } else if (DoMulticlass()) {
753  UInt_t nClasses = DataInfo().GetNClasses();
754  UInt_t cls = ev->GetClass();
755  for (UInt_t icls = 0; icls < nClasses; icls++) {
756  Double_t desired = ( cls==icls ? 1.0 : 0.0 );
757  Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
758  GetOutputNeuron( icls )->SetError(error);
759  }
760  } else {
761  Double_t desired = GetDesiredOutput( ev );
762  Double_t error=-1; //zjh
763  if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight; //zjh
764  else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
765  GetOutputNeuron()->SetError(error);
766  }
767 
769  for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
770  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
771  synapse->InitDelta();
772  synapse->CalculateDelta();
773  }
774 }
775 
776 ////////////////////////////////////////////////////////////////////////////////
777 
779 {
780  Int_t IDX = 0;
781  Int_t nSynapses = fSynapses->GetEntriesFast();
782 
783  for (Int_t i=0;i<nSynapses;i++) {
784  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
785  Dir[IDX++][0] = -synapse->GetDEDw();
786  }
787 }
788 
789 ////////////////////////////////////////////////////////////////////////////////
790 
792 {
793  TMatrixD gd(Gamma, TMatrixD::kTransposeMult, Delta);
794  if ((Double_t) gd[0][0] == 0.) return kTRUE;
795  TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
796  TMatrixD tmp(Gamma, TMatrixD::kTransposeMult, Hessian);
797  TMatrixD gHg(Gamma, TMatrixD::kTransposeMult, aHg);
798  Double_t a = 1 / (Double_t) gd[0][0];
799  Double_t f = 1 + ((Double_t)gHg[0][0]*a);
801  res *= f;
802  res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
804  res *= a;
805  Hessian += res;
806 
807  return kFALSE;
808 }
809 
810 ////////////////////////////////////////////////////////////////////////////////
811 
813 {
814  Int_t IDX = 0;
815  Int_t nSynapses = fSynapses->GetEntriesFast();
816  TMatrixD DEDw(nSynapses, 1);
817 
818  for (Int_t i=0;i<nSynapses;i++) {
819  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
820  DEDw[IDX++][0] = synapse->GetDEDw();
821  }
822 
823  dir = Hessian * DEDw;
824  for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
825 }
826 
827 ////////////////////////////////////////////////////////////////////////////////
828 
830 {
831  Int_t IDX = 0;
832  Int_t nSynapses = fSynapses->GetEntriesFast();
833  Double_t Result = 0.0;
834 
835  for (Int_t i=0;i<nSynapses;i++) {
836  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
837  Result += Dir[IDX++][0] * synapse->GetDEDw();
838  }
839  return Result;
840 }
841 
842 ////////////////////////////////////////////////////////////////////////////////
843 
844 Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
845 {
846  Int_t IDX = 0;
847  Int_t nSynapses = fSynapses->GetEntriesFast();
848  Int_t nWeights = nSynapses;
849 
850  std::vector<Double_t> Origin(nWeights);
851  for (Int_t i=0;i<nSynapses;i++) {
852  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
853  Origin[i] = synapse->GetWeight();
854  }
855 
856  Double_t err1 = GetError();
857  Double_t errOrigin=err1;//zjh
858  Double_t alpha1 = 0.;
859  Double_t alpha2 = fLastAlpha;
860 
861 
862  if (alpha2 < 0.01) alpha2 = 0.01;
863  else if (alpha2 > 2.0) alpha2 = 2.0;
864  Double_t alpha_original = alpha2;
865  Double_t alpha3 = alpha2;
866 
867  SetDirWeights( Origin, Dir, alpha2 );
868  Double_t err2 = GetError();
869  //Double_t err2 = err1;
870  Double_t err3 = err2;
871  Bool_t bingo = kFALSE;
872 
873 
874  if (err1 > err2) {
875  for (Int_t i=0;i<100;i++) {
876  alpha3 *= fTau;
877  SetDirWeights(Origin, Dir, alpha3);
878  err3 = GetError();
879  if (err3 > err2) {
880  bingo = kTRUE;
881  break;
882  }
883  alpha1 = alpha2;
884  err1 = err2;
885  alpha2 = alpha3;
886  err2 = err3;
887  }
888  if (!bingo) {
889  SetDirWeights(Origin, Dir, 0.);
890  return kTRUE;
891  }
892  }
893  else {
894  for (Int_t i=0;i<100;i++) {
895  alpha2 /= fTau;
896  if (i==50) {
897  Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
898  alpha2 = -alpha_original;
899  }
900  SetDirWeights(Origin, Dir, alpha2);
901  err2 = GetError();
902  if (err1 > err2) {
903  bingo = kTRUE;
904  break;
905  }
906  alpha3 = alpha2;
907  err3 = err2;
908  }
909  if (!bingo) {
910  SetDirWeights(Origin, Dir, 0.);
911  Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
912  fLastAlpha = 0.05;
913  return kTRUE;
914  }
915  }
916 
917  if (alpha1>0 && alpha2>0 && alpha3 > 0) {
918  fLastAlpha = 0.5 * (alpha1 + alpha3 -
919  (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
920  - ( err2 - err1 ) / (alpha2 - alpha1 )));
921  }
922  else {
923  fLastAlpha = alpha2;
924  }
925 
926  fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
927 
928  SetDirWeights(Origin, Dir, fLastAlpha);
929 
930  // leaving these lines uncommented is a heavy price to pay for only a warning message
931  // (which shouldn't appear anyway)
932  // --> about 15% of time is spent in the final GetError().
933  //
934  Double_t finalError = GetError();
935  if (finalError > err1) {
936  Log() << kWARNING << "Line search increased error! Something is wrong."
937  << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
938  << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
939  }
940 
941  for (Int_t i=0;i<nSynapses;i++) {
942  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
943  buffer[IDX] = synapse->GetWeight() - Origin[IDX];
944  IDX++;
945  }
946 
947  if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
948 
949  return kFALSE;
950 }
951 
952 ////////////////////////////////////////////////////////////////////////////////
953 
954 void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
955 {
956  Int_t IDX = 0;
957  Int_t nSynapses = fSynapses->GetEntriesFast();
958 
959  for (Int_t i=0;i<nSynapses;i++) {
960  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
961  synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
962  IDX++;
963  }
964  if (fUseRegulator) UpdatePriors();//zjh
965 }
966 
967 
968 ////////////////////////////////////////////////////////////////////////////////
969 
971 {
972  Int_t nEvents = GetNEvents();
973  UInt_t ntgts = GetNTargets();
974  Double_t Result = 0.;
975 
976  for (Int_t i=0;i<nEvents;i++) {
977  const Event* ev = GetEvent(i);
978 
980  && (Data()->GetCurrentType() == Types::kTraining)){
981  continue;
982  }
983  SimulateEvent( ev );
984 
985  Double_t error = 0.;
986  if (DoRegression()) {
987  for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
988  error += GetMSEErr( ev, itgt );//zjh
989  }
990  } else if ( DoMulticlass() ){
991  for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
992  error += GetMSEErr( ev, icls );
993  }
994  } else {
995  if (fEstimator==kMSE) error = GetMSEErr( ev ); //zjh
996  else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
997  }
998  Result += error * ev->GetWeight();
999  }
1000  if (fUseRegulator) Result+=fPrior; //zjh
1001  if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
1002  return Result;
1003 }
1004 
1005 ////////////////////////////////////////////////////////////////////////////////
1006 
1008 {
1009  Double_t error = 0;
1011  Double_t target = 0;
1012  if (DoRegression()) target = ev->GetTarget( index );
1013  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1014  else target = GetDesiredOutput( ev );
1015 
1016  error = 0.5*(output-target)*(output-target); //zjh
1017 
1018  return error;
1019 
1020 }
1021 
1022 ////////////////////////////////////////////////////////////////////////////////
1023 
1025 {
1026  Double_t error = 0;
1028  Double_t target = 0;
1029  if (DoRegression()) target = ev->GetTarget( index );
1030  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1031  else target = GetDesiredOutput( ev );
1032 
1033  error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
1034 
1035  return error;
1036 }
1037 
1038 ////////////////////////////////////////////////////////////////////////////////
1039 /// minimize estimator / train network with back propagation algorithm
1040 
1042 {
1043  // Timer timer( nEpochs, GetName() );
1044  Timer timer( (fSteps>0?100:nEpochs), GetName() );
1045  Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1046 
1047  // create histograms for overtraining monitoring
1048  Int_t nbinTest = Int_t(nEpochs/fTestRate);
1049  if(!IsSilentFile())
1050  {
1051  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
1052  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1053  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
1054  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1055  }
1057  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
1058 
1059  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
1060  timer.DrawProgressBar(0);
1061 
1062  // estimators
1063  Double_t trainE = -1;
1064  Double_t testE = -1;
1065 
1066  // start training cycles (epochs)
1067  for (Int_t i = 0; i < nEpochs; i++) {
1068 
1069  if (fExitFromTraining) break;
1070  fIPyCurrentIter = i;
1071  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1072  if ((i+1)%fTestRate == 0 || (i == 0)) {
1073  if (fSamplingTraining) {
1076  Data()->CreateSampling();
1077  }
1078  if (fSamplingTesting) {
1081  Data()->CreateSampling();
1082  }
1083  }
1084  }
1085  else {
1087  Data()->InitSampling(1.0,1.0);
1089  Data()->InitSampling(1.0,1.0);
1090  }
1092 
1093  TrainOneEpoch();
1094  DecaySynapseWeights(i >= lateEpoch);
1095 
1096  // monitor convergence of training and control sample
1097  if ((i+1)%fTestRate == 0) {
1098  trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
1099  testE = CalculateEstimator( Types::kTesting, i ); // estimator for test sample
1100  if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1101  if(!IsSilentFile())
1102  {
1103  fEstimatorHistTrain->Fill( i+1, trainE );
1104  fEstimatorHistTest ->Fill( i+1, testE );
1105  }
1106  Bool_t success = kFALSE;
1107  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1108  success = kTRUE;
1109  }
1110  Data()->EventResult( success );
1111 
1112  SetCurrentValue( testE );
1113  if (HasConverged()) {
1114  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1115  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1116  i = newEpoch;
1118  }
1119  else {
1120  if (lateEpoch > i) lateEpoch = i;
1121  else break;
1122  }
1123  }
1124  }
1125 
1126  // draw progress bar (add convergence value)
1127  TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
1128  if (fSteps > 0) {
1129  Float_t progress = 0;
1130  if (Float_t(i)/nEpochs < fSamplingEpoch)
1131  progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1132  else
1133  progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1134 
1135  timer.DrawProgressBar( Int_t(progress), convText );
1136  }
1137  else {
1138  timer.DrawProgressBar( i, convText );
1139  }
1140  }
1141 }
1142 
1143 ////////////////////////////////////////////////////////////////////////////////
1144 /// train network over a single epoch/cycle of events
1145 
1147 {
1148  Int_t nEvents = Data()->GetNEvents();
1149 
1150  // randomize the order events will be presented, important for sequential mode
1151  Int_t* index = new Int_t[nEvents];
1152  for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1153  Shuffle(index, nEvents);
1154 
1155  // loop over all training events
1156  for (Int_t i = 0; i < nEvents; i++) {
1157 
1158  const Event * ev = GetEvent(index[i]);
1159  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1160  && (Data()->GetCurrentType() == Types::kTraining)){
1161  continue;
1162  }
1163 
1164  TrainOneEvent(index[i]);
1165 
1166  // do adjustments if in batch mode
1167  if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1169  if (fgPRINT_BATCH) {
1170  PrintNetwork();
1171  WaitForKeyboard();
1172  }
1173  }
1174 
1175  // debug in sequential mode
1176  if (fgPRINT_SEQ) {
1177  PrintNetwork();
1178  WaitForKeyboard();
1179  }
1180  }
1181 
1182  delete[] index;
1183 }
1184 
1185 ////////////////////////////////////////////////////////////////////////////////
1186 /// Input:
1187 /// - index: the array to shuffle
1188 /// - n: the size of the array
1189 /// Output:
1190 /// - index: the shuffled indexes
1191 ///
1192 /// This method is used for sequential training
1193 
1195 {
1196  Int_t j, k;
1197  Int_t a = n - 1;
1198  for (Int_t i = 0; i < n; i++) {
1199  j = (Int_t) (frgen->Rndm() * a);
1200  if (j<n){ // address the 'worries' of coverity
1201  k = index[j];
1202  index[j] = index[i];
1203  index[i] = k;
1204  }
1205  }
1206 }
1207 
1208 ////////////////////////////////////////////////////////////////////////////////
1209 /// decay synapse weights
1210 /// in last 10 epochs, lower learning rate even more to find a good minimum
1211 
1213 {
1214  TSynapse* synapse;
1215  Int_t numSynapses = fSynapses->GetEntriesFast();
1216  for (Int_t i = 0; i < numSynapses; i++) {
1217  synapse = (TSynapse*)fSynapses->At(i);
1218  if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square.
1219  else synapse->DecayLearningRate(fDecayRate);
1220  }
1221 }
1222 
1223 ////////////////////////////////////////////////////////////////////////////////
1224 /// fast per-event training
1225 
1227 {
1228  GetEvent(ievt);
1229 
1230  // as soon as we know how to get event weights, get that here
1231 
1232  // note: the normalization of event weights will affect the choice
1233  // of learning rate, one will have to experiment to get the right value.
1234  // in general, if the "average" event weight is 1, the learning rate
1235  // should be good if set around 0.02 (a good value if all event weights are 1)
1236  Double_t eventWeight = 1.0;
1237 
1238  // get the desired output of this event
1239  Double_t desired;
1240  if (type == 0) desired = fOutput->GetMin(); // background //zjh
1241  else desired = fOutput->GetMax(); // signal //zjh
1242 
1243  // force the value for each input neuron
1244  Double_t x;
1245  TNeuron* neuron;
1246 
1247  for (UInt_t j = 0; j < GetNvar(); j++) {
1248  x = branchVar[j];
1249  if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1250  neuron = GetInputNeuron(j);
1251  neuron->ForceValue(x);
1252  }
1253 
1255  UpdateNetwork(desired, eventWeight);
1256 }
1257 
1258 ////////////////////////////////////////////////////////////////////////////////
1259 /// train network over a single event
1260 /// this uses the new event model
1261 
1263 {
1264  // note: the normalization of event weights will affect the choice
1265  // of learning rate, one will have to experiment to get the right value.
1266  // in general, if the "average" event weight is 1, the learning rate
1267  // should be good if set around 0.02 (a good value if all event weights are 1)
1268 
1269  const Event * ev = GetEvent(ievt);
1270  Double_t eventWeight = ev->GetWeight();
1271  ForceNetworkInputs( ev );
1273  if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1274  if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1275  else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1276 }
1277 
1278 ////////////////////////////////////////////////////////////////////////////////
1279 /// get the desired output of this event
1280 
1282 {
1283  return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
1284 }
1285 
1286 ////////////////////////////////////////////////////////////////////////////////
1287 /// update the network based on how closely
1288 /// the output matched the desired output
1289 
1291 {
1292  Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1293  if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ; //zjh
1294  else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
1295  else Log() << kFATAL << "Estimator type unspecified!!" << Endl; //zjh
1296  error *= eventWeight;
1297  GetOutputNeuron()->SetError(error);
1299  UpdateSynapses();
1300 }
1301 
1302 ////////////////////////////////////////////////////////////////////////////////
1303 /// update the network based on how closely
1304 /// the output matched the desired output
1305 
1306 void TMVA::MethodMLP::UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight)
1307 {
1308  // Norm for softmax
1309  Double_t norm = 0.;
1310  for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1312  norm += TMath::Exp(act);
1313  }
1314 
1315  // Get output of network, and apply softmax
1316  for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1318  Double_t output = TMath::Exp(act) / norm;
1319  Double_t error = output - desired.at(i);
1320  error *= eventWeight;
1321  GetOutputNeuron(i)->SetError(error);
1322  }
1323 
1324  // Do backpropagation
1326  UpdateSynapses();
1327 }
1328 
1329 ////////////////////////////////////////////////////////////////////////////////
1330 /// have each neuron calculate its delta by back propagation
1331 
1333 {
1334  TNeuron* neuron;
1335  Int_t numNeurons;
1336  Int_t numLayers = fNetwork->GetEntriesFast();
1337  TObjArray* curLayer;
1338 
1339  // step backwards through the network (back propagation)
1340  // deltas calculated starting at output layer
1341  for (Int_t i = numLayers-1; i >= 0; i--) {
1342  curLayer = (TObjArray*)fNetwork->At(i);
1343  numNeurons = curLayer->GetEntriesFast();
1344 
1345  for (Int_t j = 0; j < numNeurons; j++) {
1346  neuron = (TNeuron*) curLayer->At(j);
1347  neuron->CalculateDelta();
1348  }
1349  }
1350 }
1351 
1352 ////////////////////////////////////////////////////////////////////////////////
1353 /// create genetics class similar to GeneticCut
1354 /// give it vector of parameter ranges (parameters = weights)
1355 /// link fitness function of this class to ComputeEstimator
1356 /// instantiate GA (see MethodCuts)
1357 /// run it
1358 /// then this should exist for GA, Minuit and random sampling
1359 
1361 {
1362  PrintMessage("Minimizing Estimator with GA");
1363 
1364  // define GA parameters
1365  fGA_preCalc = 1;
1366  fGA_SC_steps = 10;
1367  fGA_SC_rate = 5;
1368  fGA_SC_factor = 0.95;
1369  fGA_nsteps = 30;
1370 
1371  // ranges
1372  std::vector<Interval*> ranges;
1373 
1374  Int_t numWeights = fSynapses->GetEntriesFast();
1375  for (Int_t ivar=0; ivar< numWeights; ivar++) {
1376  ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1377  }
1378 
1379  FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
1380  gf->Run();
1381 
1382  Double_t estimator = CalculateEstimator();
1383  Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
1384 }
1385 
1386 ////////////////////////////////////////////////////////////////////////////////
1387 /// interface to the estimate
1388 
1389 Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1390 {
1391  return ComputeEstimator( parameters );
1392 }
1393 
1394 ////////////////////////////////////////////////////////////////////////////////
1395 /// this function is called by GeneticANN for GA optimization
1396 
1397 Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1398 {
1399  TSynapse* synapse;
1400  Int_t numSynapses = fSynapses->GetEntriesFast();
1401 
1402  for (Int_t i = 0; i < numSynapses; i++) {
1403  synapse = (TSynapse*)fSynapses->At(i);
1404  synapse->SetWeight(parameters.at(i));
1405  }
1406  if (fUseRegulator) UpdatePriors(); //zjh
1407 
1408  Double_t estimator = CalculateEstimator();
1409 
1410  return estimator;
1411 }
1412 
1413 ////////////////////////////////////////////////////////////////////////////////
1414 /// update synapse error fields and adjust the weights (if in sequential mode)
1415 
1417 {
1418  TNeuron* neuron;
1419  Int_t numNeurons;
1420  TObjArray* curLayer;
1421  Int_t numLayers = fNetwork->GetEntriesFast();
1422 
1423  for (Int_t i = 0; i < numLayers; i++) {
1424  curLayer = (TObjArray*)fNetwork->At(i);
1425  numNeurons = curLayer->GetEntriesFast();
1426 
1427  for (Int_t j = 0; j < numNeurons; j++) {
1428  neuron = (TNeuron*) curLayer->At(j);
1429  if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1430  else neuron->UpdateSynapsesSequential();
1431  }
1432  }
1433 }
1434 
1435 ////////////////////////////////////////////////////////////////////////////////
1436 /// just adjust the synapse weights (should be called in batch mode)
1437 
1439 {
1440  TNeuron* neuron;
1441  Int_t numNeurons;
1442  TObjArray* curLayer;
1443  Int_t numLayers = fNetwork->GetEntriesFast();
1444 
1445  for (Int_t i = numLayers-1; i >= 0; i--) {
1446  curLayer = (TObjArray*)fNetwork->At(i);
1447  numNeurons = curLayer->GetEntriesFast();
1448 
1449  for (Int_t j = 0; j < numNeurons; j++) {
1450  neuron = (TNeuron*) curLayer->At(j);
1451  neuron->AdjustSynapseWeights();
1452  }
1453  }
1454 }
1455 
1456 ////////////////////////////////////////////////////////////////////////////////
1457 
1459 {
1460  fPrior=0;
1461  fPriorDev.clear();
1462  Int_t nSynapses = fSynapses->GetEntriesFast();
1463  for (Int_t i=0;i<nSynapses;i++) {
1464  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1465  fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1466  fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1467  }
1468 }
1469 
1470 ////////////////////////////////////////////////////////////////////////////////
1471 
1473 {
1474  TMatrixD InvH(0,0);
1475  GetApproxInvHessian(InvH);
1476  Int_t numSynapses=fSynapses->GetEntriesFast();
1477  Int_t numRegulators=fRegulators.size();
1478  Float_t gamma=0,
1479  variance=1.; // Gaussian noise
1480  std::vector<Int_t> nWDP(numRegulators);
1481  std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1482  for (int i=0;i<numSynapses;i++) {
1483  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1484  Int_t idx=fRegulatorIdx[i];
1485  nWDP[idx]++;
1486  trace[idx]+=InvH[i][i];
1487  gamma+=1-fRegulators[idx]*InvH[i][i];
1488  weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1489  }
1490  if (fEstimator==kMSE) {
1491  if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1492  else variance=CalculateEstimator( Types::kTraining, 0 );
1493  }
1494 
1495  //Log() << kDEBUG << Endl;
1496  for (int i=0;i<numRegulators;i++)
1497  {
1498  //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
1499  fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1500  if (fRegulators[i]<0) fRegulators[i]=0;
1501  Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
1502  }
1503  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
1504  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
1505 
1506  Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
1507 
1508 }
1509 
1510 ////////////////////////////////////////////////////////////////////////////////
1511 
1512 void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate) //zjh
1513 {
1514  Int_t numSynapses=fSynapses->GetEntriesFast();
1515  InvHessian.ResizeTo( numSynapses, numSynapses );
1516  InvHessian=0;
1517  TMatrixD sens(numSynapses,1);
1518  TMatrixD sensT(1,numSynapses);
1519  Int_t nEvents = GetNEvents();
1520  for (Int_t i=0;i<nEvents;i++) {
1521  GetEvent(i);
1522  double outputValue=GetMvaValue(); // force calculation
1525  for (Int_t j = 0; j < numSynapses; j++){
1526  TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1527  synapses->InitDelta();
1528  synapses->CalculateDelta();
1529  sens[j][0]=sensT[0][j]=synapses->GetDelta();
1530  }
1531  if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1532  else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1533  }
1534 
1535  // TVectorD eValue(numSynapses);
1536  if (regulate) {
1537  for (Int_t i = 0; i < numSynapses; i++){
1538  InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1539  }
1540  }
1541  else {
1542  for (Int_t i = 0; i < numSynapses; i++){
1543  InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
1544  }
1545  }
1546 
1547  InvHessian.Invert();
1548 
1549 }
1550 
1551 ////////////////////////////////////////////////////////////////////////////////
1552 
1554 {
1555  Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation
1556 
1557  // no hessian (old training file) or no error requested
1558  if (!fCalculateErrors || errLower==0 || errUpper==0)
1559  return MvaValue;
1560 
1561  Double_t MvaUpper,MvaLower,median,variance;
1562  Int_t numSynapses=fSynapses->GetEntriesFast();
1563  if (fInvHessian.GetNcols()!=numSynapses) {
1564  Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
1565  }
1566  TMatrixD sens(numSynapses,1);
1567  TMatrixD sensT(1,numSynapses);
1569  //GetOutputNeuron()->SetError(1.);
1571  for (Int_t i = 0; i < numSynapses; i++){
1572  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1573  synapses->InitDelta();
1574  synapses->CalculateDelta();
1575  sensT[0][i]=synapses->GetDelta();
1576  }
1577  sens.Transpose(sensT);
1578  TMatrixD sig=sensT*fInvHessian*sens;
1579  variance=sig[0][0];
1580  median=GetOutputNeuron()->GetValue();
1581 
1582  if (variance<0) {
1583  Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl;
1584  variance=0;
1585  }
1586  variance=sqrt(variance);
1587 
1588  //upper
1589  MvaUpper=fOutput->Eval(median+variance);
1590  if(errUpper)
1591  *errUpper=MvaUpper-MvaValue;
1592 
1593  //lower
1594  MvaLower=fOutput->Eval(median-variance);
1595  if(errLower)
1596  *errLower=MvaValue-MvaLower;
1597 
1598  return MvaValue;
1599 }
1600 
1601 
1602 #ifdef MethodMLP_UseMinuit__
1603 
1604 ////////////////////////////////////////////////////////////////////////////////
1605 /// minimize using Minuit
1606 
1607 void TMVA::MethodMLP::MinuitMinimize()
1608 {
1609  fNumberOfWeights = fSynapses->GetEntriesFast();
1610 
1611  TFitter* tfitter = new TFitter( fNumberOfWeights );
1612 
1613  // minuit-specific settings
1614  Double_t args[10];
1615 
1616  // output level
1617  args[0] = 2; // put to 0 for results only, or to -1 for no garbage
1618  tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
1619  tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
1620 
1621  double w[54];
1622 
1623  // init parameters
1624  for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1625  TString parName = Form("w%i", ipar);
1626  tfitter->SetParameter( ipar,
1627  parName, w[ipar], 0.1, 0, 0 );
1628  }
1629 
1630  // define the CFN function
1631  tfitter->SetFCN( &IFCN );
1632 
1633  // define fit strategy
1634  args[0] = 2;
1635  tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
1636 
1637  // now do the fit !
1638  args[0] = 1e-04;
1639  tfitter->ExecuteCommand( "MIGRAD", args, 1 );
1640 
1641  Bool_t doBetter = kFALSE;
1642  Bool_t doEvenBetter = kFALSE;
1643  if (doBetter) {
1644  args[0] = 1e-04;
1645  tfitter->ExecuteCommand( "IMPROVE", args, 1 );
1646 
1647  if (doEvenBetter) {
1648  args[0] = 500;
1649  tfitter->ExecuteCommand( "MINOS", args, 1 );
1650  }
1651  }
1652 }
1653 
1654 ////////////////////////////////////////////////////////////////////////////////
1655 /// Evaluate the minimisation function
1656 ///
1657 /// Input parameters:
1658 /// - npars: number of currently variable parameters
1659 /// CAUTION: this is not (necessarily) the dimension of the fitPars vector !
1660 /// - fitPars: array of (constant and variable) parameters
1661 /// - iflag: indicates what is to be calculated (see example below)
1662 /// - grad: array of gradients
1663 ///
1664 /// Output parameters:
1665 /// - f: the calculated function value.
1666 /// - grad: the (optional) vector of first derivatives).
1667 
1668 void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1669 {
1670  ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1671 }
1672 
1673 TTHREAD_TLS(Int_t) nc = 0;
1674 TTHREAD_TLS(double) minf = 1000000;
1675 
1676 void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1677 {
1678  // first update the weights
1679  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1680  TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1681  synapse->SetWeight(fitPars[ipar]);
1682  }
1683 
1684  // now compute the estimator
1685  f = CalculateEstimator();
1686 
1687  nc++;
1688  if (f < minf) minf = f;
1689  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
1690  Log() << kDEBUG << Endl;
1691  Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
1692 }
1693 
1694 ////////////////////////////////////////////////////////////////////////////////
1695 /// global "this" pointer to be used in minuit
1696 
1697 TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1698 {
1699  return fgThis;
1700 }
1701 
1702 #endif
1703 
1704 
1705 ////////////////////////////////////////////////////////////////////////////////
1706 /// write specific classifier response
1707 
1708 void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1709 {
1710  MethodANNBase::MakeClassSpecific(fout, className);
1711 }
1712 
1713 ////////////////////////////////////////////////////////////////////////////////
1714 /// get help message text
1715 ///
1716 /// typical length of text line:
1717 /// "|--------------------------------------------------------------|"
1718 
1720 {
1721  TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1722  TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1723 
1724  Log() << Endl;
1725  Log() << col << "--- Short description:" << colres << Endl;
1726  Log() << Endl;
1727  Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1728  Log() << "forward multilayer perceptron implementation. The MLP has a user-" << Endl;
1729  Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
1730  Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1731  Log() << "signal and one background). " << Endl;
1732  Log() << Endl;
1733  Log() << col << "--- Performance optimisation:" << colres << Endl;
1734  Log() << Endl;
1735  Log() << "Neural networks are stable and performing for a large variety of " << Endl;
1736  Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
1737  Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1738  Log() << "number of input variables that have only little discrimination power. " << Endl;
1739  Log() << "" << Endl;
1740  Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1741  Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1742  Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1743  Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1744  Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
1745  Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
1746  Log() << "competitive results)." << Endl;
1747  Log() << Endl;
1748  Log() << col << "Overtraining: " << colres
1749  << "only the TMlpANN performs an explicit separation of the" << Endl;
1750  Log() << "full training sample into independent training and validation samples." << Endl;
1751  Log() << "We have found that in most high-energy physics applications the " << Endl;
1752  Log() << "available degrees of freedom (training events) are sufficient to " << Endl;
1753  Log() << "constrain the weights of the relatively simple architectures required" << Endl;
1754  Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
1755  Log() << "the use of validation samples would only reduce the available training" << Endl;
1756  Log() << "information. However, if the performance on the training sample is " << Endl;
1757  Log() << "found to be significantly better than the one found with the inde-" << Endl;
1758  Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
1759  Log() << "are printed to standard output at the end of each training job." << Endl;
1760  Log() << Endl;
1761  Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
1762  Log() << Endl;
1763  Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
1764  Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1765  Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
1766  Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1767  Log() << "in favour of more neurons in the first hidden layer." << Endl;
1768  Log() << "" << Endl;
1769  Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
1770  Log() << "adjustable weights is small compared to the training sample size," << Endl;
1771  Log() << "using a large number of training samples should not lead to overtraining." << Endl;
1772 }
1773 
void WaitForKeyboard()
wait for keyboard input, for debugging
Float_t fSamplingFraction
Definition: MethodMLP.h:176
void GetHelpMessage() const
get help message text
Definition: MethodMLP.cxx:1719
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3251
An array of TObjects.
Definition: TObjArray.h:37
virtual Double_t GetMax()=0
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1007
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
void ForceNetworkCalculations()
calculate input values to each neuron
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Definition: TMatrixT.cxx:1469
Singleton class for Global types used by TMVA.
Definition: Types.h:73
void CreateWeightMonitoringHists(const TString &bulkname, std::vector< TH1 *> *hv=0) const
virtual ~MethodMLP()
destructor nothing to be done
Definition: MethodMLP.cxx:140
Base class for TMVA fitters.
Definition: FitterBase.h:51
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:212
Double_t Log(Double_t x)
Definition: TMath.h:759
void Init()
default initializations
Definition: MethodMLP.cxx:166
void DecayLearningRate(Double_t rate)
Definition: TSynapse.h:64
Double_t fLastAlpha
Definition: MethodMLP.h:183
static const Bool_t fgPRINT_SEQ
Definition: MethodMLP.h:216
virtual Double_t Rndm()
Machine independent random number generator.
Definition: TRandom3.cxx:100
Double_t GetError()
Definition: MethodMLP.cxx:970
float Float_t
Definition: RtypesCore.h:53
Int_t fGA_SC_rate
Definition: MethodMLP.h:200
Bool_t fSamplingTesting
Definition: MethodMLP.h:180
void SetDEDw(Double_t DEDw)
Definition: TSynapse.h:87
Synapse class used by TMVA artificial neural network methods.
Definition: TSynapse.h:44
UInt_t GetNvar() const
Definition: MethodBase.h:335
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:508
virtual Double_t GetMin()=0
Int_t GetNcols() const
Definition: TMatrixTBase.h:125
Int_t fResetStep
Definition: MethodMLP.h:185
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:285
Config & gConfig()
MsgLogger & Log() const
Definition: Configurable.h:122
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
Definition: TNeuron.cxx:84
Double_t GetDelta()
Definition: TSynapse.h:89
void TrainOneEpoch()
train network over a single epoch/cycle of events
Definition: MethodMLP.cxx:1146
TString fTrainMethodS
Definition: MethodMLP.h:174
EAnalysisType
Definition: Types.h:127
void SteepestDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:778
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
Definition: MethodMLP.cxx:812
Double_t fDecayRate
Definition: MethodMLP.h:189
bool fExitFromTraining
Definition: MethodBase.h:438
Basic string class.
Definition: TString.h:131
#define f(i)
Definition: RSha256.hxx:104
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
Definition: MethodMLP.cxx:1226
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
Definition: TFitter.cxx:85
UInt_t GetNTargets() const
Definition: MethodBase.h:337
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodMLP.cxx:197
Int_t fGA_nsteps
Definition: MethodMLP.h:197
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Definition: MethodMLP.cxx:294
Double_t GetActivationValue() const
Definition: TNeuron.h:105
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:348
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
Definition: TNeuron.cxx:242
virtual Double_t Eval(Double_t arg)=0
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Definition: MethodMLP.cxx:491
TObject * At(Int_t idx) const
Definition: TObjArray.h:165
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
Definition: TMatrixT.cxx:1210
Int_t fGA_preCalc
Definition: MethodMLP.h:198
Float_t fSamplingEpoch
Definition: MethodMLP.h:177
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
Definition: MethodMLP.cxx:954
TActivation * fOutput
double sqrt(double)
Double_t x[n]
Definition: legend1.C:17
void Shuffle(Int_t *index, Int_t n)
Input:
Definition: MethodMLP.cxx:1194
ETrainingMethod fTrainingMethod
Definition: MethodMLP.h:173
void CalculateDelta()
calculate error field
Definition: TNeuron.cxx:115
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:74
std::vector< TH1 * > fEpochMonHistB
void PrintMessage(TString message, Bool_t force=kFALSE) const
print messages, turn off printing by setting verbose and debug flag appropriately ...
const Event * GetEvent() const
Definition: MethodBase.h:740
DataSet * Data() const
Definition: MethodBase.h:400
TObjArray * fNetwork
Neuron class used by TMVA artificial neural network methods.
Definition: TNeuron.h:49
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:205
UInt_t GetClass() const
Definition: Event.h:81
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
Definition: MethodMLP.cxx:1512
EBPTrainingMode fBPMode
Definition: MethodMLP.h:190
Double_t fGA_SC_factor
Definition: MethodMLP.h:201
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:347
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
Definition: MethodBase.cxx:174
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
Bool_t DoRegression() const
Definition: MethodBase.h:429
void ComputeDEDw()
Definition: MethodMLP.cxx:701
bool fUseRegulator
Definition: MethodMLP.h:164
virtual void ProcessOptions()
do nothing specific at this moment
Class that contains all the data information.
Definition: DataSetInfo.h:60
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Definition: MethodMLP.cxx:1262
Bool_t fEpochMon
Definition: MethodMLP.h:194
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodMLP.cxx:1708
Multilayer Perceptron class built off of MethodANNBase.
Definition: MethodMLP.h:69
UInt_t fIPyCurrentIter
Definition: MethodBase.h:439
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
Definition: TMatrixT.cxx:1396
Float_t fImprovement
current value
void AdjustSynapseWeights()
adjust the pre-synapses&#39; weights for each neuron (input neuron has no pre-synapse) this method should...
Definition: TNeuron.cxx:263
Int_t fUpdateLimit
Definition: MethodMLP.h:171
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
Definition: MethodMLP.cxx:1416
virtual void SetFCN(void(*fcn)(Int_t &, Double_t *, Double_t &f, Double_t *, Int_t))
Specify the address of the fitting algorithm.
Definition: TFitter.cxx:550
std::vector< Double_t > fPriorDev
Definition: MethodMLP.h:167
std::vector< Float_t > & GetTargets()
Definition: Event.h:98
Double_t GetCEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1024
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Definition: MethodMLP.cxx:1360
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:408
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:348
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
TObjArray * fSynapses
Bool_t DoMulticlass() const
Definition: MethodBase.h:430
void InitializeLearningRates()
initialize learning rates of synapses, used only by back propagation
Definition: MethodMLP.cxx:280
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum ...
Definition: MethodMLP.cxx:1212
Double_t GetDEDw()
Definition: TSynapse.h:88
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
Definition: MethodMLP.cxx:1281
virtual void PrintNetwork() const
print network representation, for debugging
double gamma(double x)
void SetLearningRate(Double_t rate)
Definition: TSynapse.h:58
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Definition: MethodMLP.cxx:791
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
UInt_t GetNTargets() const
Definition: DataSetInfo.h:111
Bool_t HasConverged(Bool_t withinConvergenceBand=kFALSE)
gives back true if the last "steps" steps have lead to an improvement of the "fitness" of the "indivi...
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Definition: MethodMLP.cxx:1553
void SetWeight(Double_t weight)
set synapse weight
Definition: TSynapse.cxx:69
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with back propagation algorithm
Definition: MethodMLP.cxx:1041
SVector< double, 2 > v
Definition: Dict.h:5
const char * GetName() const
Definition: MethodBase.h:325
Float_t fWeightRange
Definition: MethodMLP.h:206
auto * a
Definition: textangle.C:12
The TMVA::Interval Class.
Definition: Interval.h:61
Int_t fSteps
minimum improvement which counts as improvement
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
Definition: TNeuron.cxx:224
UInt_t fIPyMaxIter
Definition: MethodBase.h:439
Float_t Progress()
returns a float from 0 (just started) to 1 (finished)
unsigned int UInt_t
Definition: RtypesCore.h:42
Int_t GetEntriesFast() const
Definition: TObjArray.h:64
char * Form(const char *fmt,...)
std::vector< TH1 * > fEpochMonHistW
std::vector< Double_t > fRegulators
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
Definition: MethodMLP.cxx:844
TNeuron * GetInputNeuron(Int_t index)
Tools & gTools()
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
Definition: MethodMLP.cxx:1290
Bool_t IsSilentFile()
Definition: MethodBase.h:370
static const Bool_t fgPRINT_BATCH
Definition: MethodMLP.h:217
std::vector< Int_t > fRegulatorIdx
TString fBpModeS
Definition: MethodMLP.h:191
Double_t GetValue() const
Definition: TNeuron.h:104
const Bool_t kFALSE
Definition: RtypesCore.h:88
Double_t GetWeight()
Definition: TSynapse.h:55
#define d(i)
Definition: RSha256.hxx:102
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:675
Double_t Exp(Double_t x)
Definition: TMath.h:726
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Definition: MethodMLP.cxx:676
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:572
#define ClassImp(name)
Definition: Rtypes.h:359
void CalculateNeuronDeltas()
have each neuron calculate its delta by back propagation
Definition: MethodMLP.cxx:1332
double Double_t
Definition: RtypesCore.h:55
Bool_t WriteOptionsReference() const
Definition: Config.h:71
void InitDelta()
Definition: TSynapse.h:85
TNeuron * GetOutputNeuron(Int_t index=0)
Bool_t IsNormalised() const
Definition: MethodBase.h:485
void ForceNetworkInputs(const Event *ev, Int_t ignoreIndex=-1)
force the input values of the input neurons force the value for each input neuron ...
int type
Definition: TGX11.cxx:120
Double_t fLearnRate
Definition: MethodMLP.h:188
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
The TH1 histogram class.
Definition: TH1.h:56
void SimulateEvent(const Event *ev)
Definition: MethodMLP.cxx:738
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
Double_t fPrior
Definition: MethodMLP.h:166
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
Definition: MethodMLP.cxx:1438
void AddPreDefVal(const T &)
Definition: Configurable.h:168
Double_t fTau
Definition: MethodMLP.h:184
void ExitFromTraining()
Definition: MethodBase.h:453
Int_t fBatchSize
Definition: MethodMLP.h:192
const TString & GetOptions() const
Definition: Configurable.h:84
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Definition: MethodMLP.cxx:154
#define REGISTER_METHOD(CLASS)
for example
void ProcessOptions()
process user options
Definition: MethodMLP.cxx:249
IPythonInteractive * fInteractive
Definition: MethodBase.h:437
Float_t fSamplingWeight
Definition: MethodMLP.h:178
bool fCalculateErrors
Definition: MethodMLP.h:165
void CalculateDelta()
calculate/adjust the error field for this synapse
Definition: TSynapse.cxx:109
Double_t EstimatorFunction(std::vector< Double_t > &parameters)
interface to the estimate
Definition: MethodMLP.cxx:1389
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
Definition: MethodMLP.cxx:1397
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
std::vector< TH1 * > fEpochMonHistS
Bool_t IsSignal(const Event *ev) const
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:190
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:428
std::vector< std::pair< Float_t, Float_t > > * fDeviationsFromTargets
Definition: MethodMLP.h:204
void SetError(Double_t error)
set error, this should only be done for an output neuron
Definition: TNeuron.cxx:212
void SetCurrentValue(Float_t value)
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition: Tools.cxx:122
Double_t Sqrt(Double_t x)
Definition: TMath.h:690
Double_t DerivDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:829
void UpdateRegulators()
Definition: MethodMLP.cxx:1472
Int_t fGA_SC_steps
Definition: MethodMLP.h:199
double exp(double)
const Bool_t kTRUE
Definition: RtypesCore.h:87
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:459
Base class for all TMVA methods using artificial neural networks.
Definition: MethodANNBase.h:62
Fitter using a Genetic Algorithm.
Definition: GeneticFitter.h:43
const Int_t n
Definition: legend1.C:16
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
Definition: MethodMLP.cxx:92
virtual Double_t EvalDerivative(Double_t arg)=0
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:427
char name[80]
Definition: TGX11.cxx:109
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:355
Bool_t fSamplingTraining
Definition: MethodMLP.h:179
std::vector< Float_t > * GetTargetsForMulticlass(const Event *ev)
const char * Data() const
Definition: TString.h:364
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter ipar : parameter number parname : parameter name value : initial p...
Definition: TFitter.cxx:581