ROOT  6.06/09
Reference Guide
MethodMLP.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard v. Toerne, Jan Therhaag, Jiahang Zhong
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodMLP *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * ANN Multilayer Perceptron class for the discrimination of signal *
12  * from background. BFGS implementation based on TMultiLayerPerceptron *
13  * class from ROOT (http://root.cern.ch). *
14  * *
15  * Authors (alphabetical): *
16  * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19  * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20  * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25  * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26  * *
27  * Copyright (c) 2005-2011: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * MPI-K Heidelberg, Germany *
31  * U. of Bonn, Germany *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 //_______________________________________________________________________
39 //
40 // Multilayer Perceptron class built off of MethodANNBase
41 //_______________________________________________________________________
42 
43 #include "TString.h"
44 #include <vector>
45 #include <cmath>
46 #include "TTree.h"
47 #include "Riostream.h"
48 #include "TFitter.h"
49 #include "TMatrixD.h"
50 #include "TMath.h"
51 #include "TFile.h"
52 
53 #include "TMVA/ClassifierFactory.h"
54 #include "TMVA/Interval.h"
55 #include "TMVA/MethodMLP.h"
56 #include "TMVA/TNeuron.h"
57 #include "TMVA/TSynapse.h"
58 #include "TMVA/Timer.h"
59 #include "TMVA/Types.h"
60 #include "TMVA/Tools.h"
61 #include "TMVA/GeneticFitter.h"
62 #include "TMVA/Config.h"
63 
64 #ifdef MethodMLP_UseMinuit__
65 TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
66 Bool_t MethodMLP_UseMinuit = kTRUE;
67 #endif
68 
69 REGISTER_METHOD(MLP)
70 
71 ClassImp(TMVA::MethodMLP)
72 
73 using std::vector;
74 
75 ////////////////////////////////////////////////////////////////////////////////
76 /// standard constructor
77 
78 TMVA::MethodMLP::MethodMLP( const TString& jobName,
79  const TString& methodTitle,
80  DataSetInfo& theData,
81  const TString& theOption,
82  TDirectory* theTargetDir )
83  : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption, theTargetDir ),
84  fUseRegulator(false), fCalculateErrors(false),
85  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
86  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
87  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
88  fSamplingTraining(false), fSamplingTesting(false),
89  fLastAlpha(0.0), fTau(0.),
90  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
91  fBPMode(kSequential), fBpModeS("None"),
92  fBatchSize(0), fTestRate(0), fEpochMon(false),
93  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
94  fGA_SC_rate(0), fGA_SC_factor(0.0),
95  fDeviationsFromTargets(0),
96  fWeightRange (1.0)
97 {
98 }
99 
100 ////////////////////////////////////////////////////////////////////////////////
101 /// constructor from a weight file
102 
104  const TString& theWeightFile,
105  TDirectory* theTargetDir )
106  : MethodANNBase( Types::kMLP, theData, theWeightFile, theTargetDir ),
107  fUseRegulator(false), fCalculateErrors(false),
108  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
109  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
110  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
111  fSamplingTraining(false), fSamplingTesting(false),
112  fLastAlpha(0.0), fTau(0.),
113  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
114  fBPMode(kSequential), fBpModeS("None"),
115  fBatchSize(0), fTestRate(0), fEpochMon(false),
116  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
117  fGA_SC_rate(0), fGA_SC_factor(0.0),
118  fDeviationsFromTargets(0),
119  fWeightRange (1.0)
120 {
121 }
122 
123 ////////////////////////////////////////////////////////////////////////////////
124 /// destructor
125 /// nothing to be done
126 
128 {
129 }
130 
131 ////////////////////////////////////////////////////////////////////////////////
132 /// MLP can handle classification with 2 classes and regression with one regression-target
133 
135 {
136  if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
137  if (type == Types::kMulticlass ) return kTRUE;
138  if (type == Types::kRegression ) return kTRUE;
139 
140  return kFALSE;
141 }
142 
143 ////////////////////////////////////////////////////////////////////////////////
144 /// default initializations
145 
147 {
148  // the minimum requirement to declare an event signal-like
149  SetSignalReferenceCut( 0.5 );
150 #ifdef MethodMLP_UseMinuit__
151  fgThis = this;
152 #endif
153 }
154 
155 ////////////////////////////////////////////////////////////////////////////////
156 /// define the options (their key words) that can be set in the option string
157 /// know options:
158 /// TrainingMethod <string> Training method
159 /// available values are: BP Back-Propagation <default>
160 /// GA Genetic Algorithm (takes a LONG time)
161 ///
162 /// LearningRate <float> NN learning rate parameter
163 /// DecayRate <float> Decay rate for learning parameter
164 /// TestRate <int> Test for overtraining performed at each #th epochs
165 ///
166 /// BPMode <string> Back-propagation learning mode
167 /// available values are: sequential <default>
168 /// batch
169 ///
170 /// BatchSize <int> Batch size: number of events/batch, only set if in Batch Mode,
171 /// -1 for BatchSize=number_of_events
172 
174 {
175  DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
176  "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
177  AddPreDefVal(TString("BP"));
178  AddPreDefVal(TString("GA"));
179  AddPreDefVal(TString("BFGS"));
180 
181  DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
182  DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
183  DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
184  DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
185 
186  DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
187  DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
188  DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
189 
190  DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
191  DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
192 
193  DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
194  DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
195 
196  DeclareOptionRef(fBpModeS="sequential", "BPMode",
197  "Back-propagation learning mode: sequential or batch");
198  AddPreDefVal(TString("sequential"));
199  AddPreDefVal(TString("batch"));
200 
201  DeclareOptionRef(fBatchSize=-1, "BatchSize",
202  "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
203 
204  DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
205  "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
206 
207  DeclareOptionRef(fSteps=-1, "ConvergenceTests",
208  "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
209 
210  DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
211  "Use regulator to avoid over-training"); //zjh
212  DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit",
213  "Maximum times of regulator update"); //zjh
214  DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
215  "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh
216 
217  DeclareOptionRef(fWeightRange=1.0, "WeightRange",
218  "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
219 
220 }
221 
222 ////////////////////////////////////////////////////////////////////////////////
223 /// process user options
224 
226 {
228 
229 
230  if (IgnoreEventsWithNegWeightsInTraining()) {
231  Log() << kINFO
232  << "Will ignore negative events in training!"
233  << Endl;
234  }
235 
236 
237  if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
238  else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
239  else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
240 
241  if (fBpModeS == "sequential") fBPMode = kSequential;
242  else if (fBpModeS == "batch") fBPMode = kBatch;
243 
244  // InitializeLearningRates();
245 
246  if (fBPMode == kBatch) {
247  Data()->SetCurrentType(Types::kTraining);
248  Int_t numEvents = Data()->GetNEvents();
249  if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
250  }
251 }
252 
253 ////////////////////////////////////////////////////////////////////////////////
254 /// initialize learning rates of synapses, used only by backpropagation
255 
257 {
258  Log() << kDEBUG << "Initialize learning rates" << Endl;
259  TSynapse *synapse;
260  Int_t numSynapses = fSynapses->GetEntriesFast();
261  for (Int_t i = 0; i < numSynapses; i++) {
262  synapse = (TSynapse*)fSynapses->At(i);
263  synapse->SetLearningRate(fLearnRate);
264  }
265 }
266 
267 ////////////////////////////////////////////////////////////////////////////////
268 /// calculate the estimator that training is attempting to minimize
269 
271 {
272  // sanity check
273  if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
274  Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
275  }
276 
277  Types::ETreeType saveType = Data()->GetCurrentType();
278  Data()->SetCurrentType(treeType);
279 
280  // if epochs are counted create monitoring histograms (only available for classification)
281  TString type = (treeType == Types::kTraining ? "train" : "test");
282  TString name = Form("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
283  TString nameB = name + "_B";
284  TString nameS = name + "_S";
285  Int_t nbin = 100;
286  Float_t limit = 2;
287  TH1* histS = 0;
288  TH1* histB = 0;
289  if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
290  histS = new TH1F( nameS, nameS, nbin, -limit, limit );
291  histB = new TH1F( nameB, nameB, nbin, -limit, limit );
292  }
293 
294  Double_t estimator = 0;
295 
296  // loop over all training events
297  Int_t nEvents = GetNEvents();
298  UInt_t nClasses = DataInfo().GetNClasses();
299  UInt_t nTgts = DataInfo().GetNTargets();
300 
301 
302  Float_t sumOfWeights = 0.f;
303  if( fWeightRange < 1.f ){
304  fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents);
305  }
306 
307  for (Int_t i = 0; i < nEvents; i++) {
308 
309  const Event* ev = GetEvent(i);
310 
311  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
312  && (saveType == Types::kTraining)){
313  continue;
314  }
315 
316  Double_t w = ev->GetWeight();
317 
318  ForceNetworkInputs( ev );
319  ForceNetworkCalculations();
320 
321  Double_t d = 0, v = 0;
322  if (DoRegression()) {
323  for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
324  v = GetOutputNeuron( itgt )->GetActivationValue();
325  Double_t targetValue = ev->GetTarget( itgt );
326  Double_t dt = v - targetValue;
327  d += (dt*dt);
328  }
329  estimator += d*w;
330  } else if (DoMulticlass() ) {
331  UInt_t cls = ev->GetClass();
332  if (fEstimator==kCE){
333  Double_t norm(0);
334  for (UInt_t icls = 0; icls < nClasses; icls++) {
335  Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
336  norm += exp( activationValue );
337  if(icls==cls)
338  d = exp( activationValue );
339  }
340  d = -TMath::Log(d/norm);
341  }
342  else{
343  for (UInt_t icls = 0; icls < nClasses; icls++) {
344  Double_t desired = (icls==cls) ? 1.0 : 0.0;
345  v = GetOutputNeuron( icls )->GetActivationValue();
346  d = (desired-v)*(desired-v);
347  }
348  }
349  estimator += d*w; //zjh
350  } else {
351  Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
352  v = GetOutputNeuron()->GetActivationValue();
353  if (fEstimator==kMSE) d = (desired-v)*(desired-v); //zjh
354  else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v)); //zjh
355  estimator += d*w; //zjh
356  }
357 
358  if( fDeviationsFromTargets )
359  fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
360 
361  sumOfWeights += w;
362 
363 
364  // fill monitoring histograms
365  if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
366  else if (histB != 0) histB->Fill( float(v), float(w) );
367  }
368 
369 
370  if( fDeviationsFromTargets ) {
371  std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
372 
373  Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
374  estimator = 0.f;
375 
376  Float_t weightRangeCut = fWeightRange*sumOfWeights;
377  Float_t weightSum = 0.f;
378  for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
379  float deviation = (*itDev).first;
380  float devWeight = (*itDev).second;
381  weightSum += devWeight; // add the weight of this event
382  if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange
383  estimator += devWeight*deviation;
384  }
385  }
386 
387  sumOfWeights = sumOfWeightsInRange;
388  delete fDeviationsFromTargets;
389  }
390 
391  if (histS != 0) fEpochMonHistS.push_back( histS );
392  if (histB != 0) fEpochMonHistB.push_back( histB );
393 
394  //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
395  //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
396  //else estimator = estimator*0.5/Float_t(nEvents);
397  if (DoRegression()) estimator = estimator/Float_t(sumOfWeights);
398  else if (DoMulticlass()) estimator = estimator/Float_t(sumOfWeights);
399  else estimator = estimator/Float_t(sumOfWeights);
400 
401 
402  //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh
403 
404  Data()->SetCurrentType( saveType );
405 
406  // provide epoch-wise monitoring
407  if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
408  CreateWeightMonitoringHists( Form("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
409  }
410 
411  return estimator;
412 }
413 
414 ////////////////////////////////////////////////////////////////////////////////
415 
417 {
418  if (fNetwork == 0) {
419  //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
420  Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
421  SetAnalysisType(GetAnalysisType());
422  }
423  Log() << kDEBUG << "reinitalize learning rates" << Endl;
424  InitializeLearningRates();
425  PrintMessage("Training Network");
426 
427  Int_t nEvents=GetNEvents();
428  Int_t nSynapses=fSynapses->GetEntriesFast();
429  if (nSynapses>nEvents)
430  Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
431 
432 #ifdef MethodMLP_UseMinuit__
433  if (useMinuit) MinuitMinimize();
434 #else
435  if (fTrainingMethod == kGA) GeneticMinimize();
436  else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
437  else BackPropagationMinimize(nEpochs);
438 #endif
439 
440  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
441  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
442  if (fUseRegulator){
443  Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
444  UpdateRegulators();
445  Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
446  }
447 
448  if( fCalculateErrors || fUseRegulator )
449  {
450  Int_t numSynapses=fSynapses->GetEntriesFast();
451  fInvHessian.ResizeTo(numSynapses,numSynapses);
452  GetApproxInvHessian( fInvHessian ,false);
453  }
454 }
455 
456 ////////////////////////////////////////////////////////////////////////////////
457 /// train network with BFGS algorithm
458 
460 {
461  Timer timer( (fSteps>0?100:nEpochs), GetName() );
462 
463  // create histograms for overtraining monitoring
464  Int_t nbinTest = Int_t(nEpochs/fTestRate);
465  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
466  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
467  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
468  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
469 
470  Int_t nSynapses = fSynapses->GetEntriesFast();
471  Int_t nWeights = nSynapses;
472 
473  for (Int_t i=0;i<nSynapses;i++) {
474  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
475  synapse->SetDEDw(0.0);
476  }
477 
478  std::vector<Double_t> buffer( nWeights );
479  for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
480 
481  TMatrixD Dir ( nWeights, 1 );
482  TMatrixD Hessian ( nWeights, nWeights );
483  TMatrixD Gamma ( nWeights, 1 );
484  TMatrixD Delta ( nWeights, 1 );
485  Int_t RegUpdateCD=0; //zjh
486  Int_t RegUpdateTimes=0; //zjh
487  Double_t AccuError=0;
488 
489  Double_t trainE = -1;
490  Double_t testE = -1;
491 
492  fLastAlpha = 0.;
493 
494  if(fSamplingTraining || fSamplingTesting)
495  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
496 
497  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
498  timer.DrawProgressBar( 0 );
499 
500  // start training cycles (epochs)
501  for (Int_t i = 0; i < nEpochs; i++) {
502  if (Float_t(i)/nEpochs < fSamplingEpoch) {
503  if ((i+1)%fTestRate == 0 || (i == 0)) {
504  if (fSamplingTraining) {
505  Data()->SetCurrentType( Types::kTraining );
506  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
507  Data()->CreateSampling();
508  }
509  if (fSamplingTesting) {
510  Data()->SetCurrentType( Types::kTesting );
511  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
512  Data()->CreateSampling();
513  }
514  }
515  }
516  else {
517  Data()->SetCurrentType( Types::kTraining );
518  Data()->InitSampling(1.0,1.0);
519  Data()->SetCurrentType( Types::kTesting );
520  Data()->InitSampling(1.0,1.0);
521  }
522  Data()->SetCurrentType( Types::kTraining );
523 
524  //zjh
525  if (fUseRegulator) {
526  UpdatePriors();
527  RegUpdateCD++;
528  }
529  //zjh
530 
531  SetGammaDelta( Gamma, Delta, buffer );
532 
533  if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
534  SteepestDir( Dir );
535  Hessian.UnitMatrix();
536  RegUpdateCD=0; //zjh
537  }
538  else {
539  if (GetHessian( Hessian, Gamma, Delta )) {
540  SteepestDir( Dir );
541  Hessian.UnitMatrix();
542  RegUpdateCD=0; //zjh
543  }
544  else SetDir( Hessian, Dir );
545  }
546 
547  Double_t dError=0; //zjh
548  if (DerivDir( Dir ) > 0) {
549  SteepestDir( Dir );
550  Hessian.UnitMatrix();
551  RegUpdateCD=0; //zjh
552  }
553  if (LineSearch( Dir, buffer, &dError )) { //zjh
554  Hessian.UnitMatrix();
555  SteepestDir( Dir );
556  RegUpdateCD=0; //zjh
557  if (LineSearch(Dir, buffer, &dError)) { //zjh
558  i = nEpochs;
559  Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
560  }
561  }
562 
563  //zjh+
564  if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
565  AccuError+=dError;
566 
567  if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
568  Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
569  UpdateRegulators();
570  Hessian.UnitMatrix();
571  RegUpdateCD=0;
572  RegUpdateTimes++;
573  AccuError=0;
574  }
575  //zjh-
576 
577  // monitor convergence of training and control sample
578  if ((i+1)%fTestRate == 0) {
579  //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample //zjh
580  //testE = CalculateEstimator( Types::kTesting, i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
581  trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample //zjh
582  testE = CalculateEstimator( Types::kTesting, i ) ; // estimator for test sample //zjh
583  fEstimatorHistTrain->Fill( i+1, trainE );
584  fEstimatorHistTest ->Fill( i+1, testE );
585 
586  Bool_t success = kFALSE;
587  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
588  success = kTRUE;
589  }
590  Data()->EventResult( success );
591 
592  SetCurrentValue( testE );
593  if (HasConverged()) {
594  if (Float_t(i)/nEpochs < fSamplingEpoch) {
595  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
596  i = newEpoch;
597  ResetConvergenceCounter();
598  }
599  else break;
600  }
601  }
602 
603  // draw progress
604  TString convText = Form( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh
605  if (fSteps > 0) {
606  Float_t progress = 0;
607  if (Float_t(i)/nEpochs < fSamplingEpoch)
608 // progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
609  progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
610  else
611  {
612 // progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
613  progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
614  }
615  Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
616  if (progress2>progress) progress=progress2; //zjh
617  timer.DrawProgressBar( Int_t(progress), convText );
618  }
619  else {
620  Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
621  if (progress<i) progress=i; //zjh
622  timer.DrawProgressBar( progress, convText ); //zjh
623  }
624 
625  // some verbose output
626  if (fgPRINT_SEQ) {
627  PrintNetwork();
628  WaitForKeyboard();
629  }
630  }
631 }
632 
633 ////////////////////////////////////////////////////////////////////////////////
634 
635 void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
636 {
637  Int_t nWeights = fSynapses->GetEntriesFast();
638 
639  Int_t IDX = 0;
640  Int_t nSynapses = fSynapses->GetEntriesFast();
641  for (Int_t i=0;i<nSynapses;i++) {
642  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
643  Gamma[IDX++][0] = -synapse->GetDEDw();
644  }
645 
646  for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
647 
648  ComputeDEDw();
649 
650  IDX = 0;
651  for (Int_t i=0;i<nSynapses;i++)
652  {
653  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
654  Gamma[IDX++][0] += synapse->GetDEDw();
655  }
656 }
657 
658 ////////////////////////////////////////////////////////////////////////////////
659 
661 {
662  Int_t nSynapses = fSynapses->GetEntriesFast();
663  for (Int_t i=0;i<nSynapses;i++) {
664  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
665  synapse->SetDEDw( 0.0 );
666  }
667 
668  Int_t nEvents = GetNEvents();
669  Int_t nPosEvents = nEvents;
670  for (Int_t i=0;i<nEvents;i++) {
671 
672  const Event* ev = GetEvent(i);
673  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
674  && (Data()->GetCurrentType() == Types::kTraining)){
675  --nPosEvents;
676  continue;
677  }
678 
679  SimulateEvent( ev );
680 
681  for (Int_t j=0;j<nSynapses;j++) {
682  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
683  synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
684  }
685  }
686 
687  for (Int_t i=0;i<nSynapses;i++) {
688  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
689  Double_t DEDw=synapse->GetDEDw(); //zjh
690  if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
691  synapse->SetDEDw( DEDw / nPosEvents ); //zjh
692  }
693 }
694 
695 ////////////////////////////////////////////////////////////////////////////////
696 
698 {
699  Double_t eventWeight = ev->GetWeight();
700 
701  ForceNetworkInputs( ev );
702  ForceNetworkCalculations();
703 
704  if (DoRegression()) {
705  UInt_t ntgt = DataInfo().GetNTargets();
706  for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
707  Double_t desired = ev->GetTarget(itgt);
708  Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
709  GetOutputNeuron( itgt )->SetError(error);
710  }
711  } else if (DoMulticlass()) {
712  UInt_t nClasses = DataInfo().GetNClasses();
713  UInt_t cls = ev->GetClass();
714  for (UInt_t icls = 0; icls < nClasses; icls++) {
715  Double_t desired = ( cls==icls ? 1.0 : 0.0 );
716  Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
717  GetOutputNeuron( icls )->SetError(error);
718  }
719  } else {
720  Double_t desired = GetDesiredOutput( ev );
721  Double_t error=-1; //zjh
722  if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight; //zjh
723  else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
724  GetOutputNeuron()->SetError(error);
725  }
726 
727  CalculateNeuronDeltas();
728  for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
729  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
730  synapse->InitDelta();
731  synapse->CalculateDelta();
732  }
733 }
734 
735 ////////////////////////////////////////////////////////////////////////////////
736 
738 {
739  Int_t IDX = 0;
740  Int_t nSynapses = fSynapses->GetEntriesFast();
741 
742  for (Int_t i=0;i<nSynapses;i++) {
743  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
744  Dir[IDX++][0] = -synapse->GetDEDw();
745  }
746 }
747 
748 ////////////////////////////////////////////////////////////////////////////////
749 
751 {
752  TMatrixD gd(Gamma, TMatrixD::kTransposeMult, Delta);
753  if ((Double_t) gd[0][0] == 0.) return kTRUE;
754  TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
755  TMatrixD tmp(Gamma, TMatrixD::kTransposeMult, Hessian);
756  TMatrixD gHg(Gamma, TMatrixD::kTransposeMult, aHg);
757  Double_t a = 1 / (Double_t) gd[0][0];
758  Double_t f = 1 + ((Double_t)gHg[0][0]*a);
760  res *= f;
761  res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
763  res *= a;
764  Hessian += res;
765 
766  return kFALSE;
767 }
768 
769 ////////////////////////////////////////////////////////////////////////////////
770 
772 {
773  Int_t IDX = 0;
774  Int_t nSynapses = fSynapses->GetEntriesFast();
775  TMatrixD DEDw(nSynapses, 1);
776 
777  for (Int_t i=0;i<nSynapses;i++) {
778  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
779  DEDw[IDX++][0] = synapse->GetDEDw();
780  }
781 
782  dir = Hessian * DEDw;
783  for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
784 }
785 
786 ////////////////////////////////////////////////////////////////////////////////
787 
789 {
790  Int_t IDX = 0;
791  Int_t nSynapses = fSynapses->GetEntriesFast();
792  Double_t Result = 0.0;
793 
794  for (Int_t i=0;i<nSynapses;i++) {
795  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
796  Result += Dir[IDX++][0] * synapse->GetDEDw();
797  }
798  return Result;
799 }
800 
801 ////////////////////////////////////////////////////////////////////////////////
802 
803 Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
804 {
805  Int_t IDX = 0;
806  Int_t nSynapses = fSynapses->GetEntriesFast();
807  Int_t nWeights = nSynapses;
808 
809  std::vector<Double_t> Origin(nWeights);
810  for (Int_t i=0;i<nSynapses;i++) {
811  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
812  Origin[i] = synapse->GetWeight();
813  }
814 
815  Double_t err1 = GetError();
816  Double_t errOrigin=err1; //zjh
817  Double_t alpha1 = 0.;
818  Double_t alpha2 = fLastAlpha;
819 
820 
821  if (alpha2 < 0.01) alpha2 = 0.01;
822  else if (alpha2 > 2.0) alpha2 = 2.0;
823  Double_t alpha_original = alpha2;
824  Double_t alpha3 = alpha2;
825 
826  SetDirWeights( Origin, Dir, alpha2 );
827  Double_t err2 = GetError();
828  //Double_t err2 = err1;
829  Double_t err3 = err2;
830  Bool_t bingo = kFALSE;
831 
832 
833  if (err1 > err2) {
834  for (Int_t i=0;i<100;i++) {
835  alpha3 *= fTau;
836  SetDirWeights(Origin, Dir, alpha3);
837  err3 = GetError();
838  if (err3 > err2) {
839  bingo = kTRUE;
840  break;
841  }
842  alpha1 = alpha2;
843  err1 = err2;
844  alpha2 = alpha3;
845  err2 = err3;
846  }
847  if (!bingo) {
848  SetDirWeights(Origin, Dir, 0.);
849  return kTRUE;
850  }
851  }
852  else {
853  for (Int_t i=0;i<100;i++) {
854  alpha2 /= fTau;
855  if (i==50) {
856  Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
857  alpha2 = -alpha_original;
858  }
859  SetDirWeights(Origin, Dir, alpha2);
860  err2 = GetError();
861  if (err1 > err2) {
862  bingo = kTRUE;
863  break;
864  }
865  alpha3 = alpha2;
866  err3 = err2;
867  }
868  if (!bingo) {
869  SetDirWeights(Origin, Dir, 0.);
870  Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
871  fLastAlpha = 0.05;
872  return kTRUE;
873  }
874  }
875 
876  if (alpha1>0 && alpha2>0 && alpha3 > 0) {
877  fLastAlpha = 0.5 * (alpha1 + alpha3 -
878  (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
879  - ( err2 - err1 ) / (alpha2 - alpha1 )));
880  }
881  else {
882  fLastAlpha = alpha2;
883  }
884 
885  fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
886 
887  SetDirWeights(Origin, Dir, fLastAlpha);
888 
889  // leaving these lines uncommented is a heavy price to pay for only a warning message
890  // (which shoulnd't appear anyway)
891  // --> about 15% of time is spent in the final GetError().
892  //
893  Double_t finalError = GetError();
894  if (finalError > err1) {
895  Log() << kWARNING << "Line search increased error! Something is wrong."
896  << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
897  << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
898  }
899 
900  for (Int_t i=0;i<nSynapses;i++) {
901  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
902  buffer[IDX] = synapse->GetWeight() - Origin[IDX];
903  IDX++;
904  }
905 
906  if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
907 
908  return kFALSE;
909 }
910 
911 ////////////////////////////////////////////////////////////////////////////////
912 
913 void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
914 {
915  Int_t IDX = 0;
916  Int_t nSynapses = fSynapses->GetEntriesFast();
917 
918  for (Int_t i=0;i<nSynapses;i++) {
919  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
920  synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
921  IDX++;
922  }
923  if (fUseRegulator) UpdatePriors(); //zjh
924 }
925 
926 
927 ////////////////////////////////////////////////////////////////////////////////
928 
930 {
931  Int_t nEvents = GetNEvents();
932  UInt_t ntgts = GetNTargets();
933  Double_t Result = 0.;
934 
935  for (Int_t i=0;i<nEvents;i++) {
936  const Event* ev = GetEvent(i);
937 
938  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
939  && (Data()->GetCurrentType() == Types::kTraining)){
940  continue;
941  }
942  SimulateEvent( ev );
943 
944  Double_t error = 0.;
945  if (DoRegression()) {
946  for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
947  error += GetMSEErr( ev, itgt ); //zjh
948  }
949  } else if ( DoMulticlass() ){
950  for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
951  error += GetMSEErr( ev, icls );
952  }
953  } else {
954  if (fEstimator==kMSE) error = GetMSEErr( ev ); //zjh
955  else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
956  }
957  Result += error * ev->GetWeight();
958  }
959  if (fUseRegulator) Result+=fPrior; //zjh
960  if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
961  return Result;
962 }
963 
964 ////////////////////////////////////////////////////////////////////////////////
965 
967 {
968  Double_t error = 0;
969  Double_t output = GetOutputNeuron( index )->GetActivationValue();
970  Double_t target = 0;
971  if (DoRegression()) target = ev->GetTarget( index );
972  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
973  else target = GetDesiredOutput( ev );
974 
975  error = 0.5*(output-target)*(output-target); //zjh
976 
977  return error;
978 
979 }
980 
981 ////////////////////////////////////////////////////////////////////////////////
982 
984 {
985  Double_t error = 0;
986  Double_t output = GetOutputNeuron( index )->GetActivationValue();
987  Double_t target = 0;
988  if (DoRegression()) target = ev->GetTarget( index );
989  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
990  else target = GetDesiredOutput( ev );
991 
992  error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
993 
994  return error;
995 }
996 
997 ////////////////////////////////////////////////////////////////////////////////
998 /// minimize estimator / train network with backpropagation algorithm
999 
1001 {
1002  // Timer timer( nEpochs, GetName() );
1003  Timer timer( (fSteps>0?100:nEpochs), GetName() );
1004  Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1005 
1006  // create histograms for overtraining monitoring
1007  Int_t nbinTest = Int_t(nEpochs/fTestRate);
1008  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
1009  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1010  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
1011  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1012 
1013  if(fSamplingTraining || fSamplingTesting)
1014  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
1015 
1016  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
1017  timer.DrawProgressBar(0);
1018 
1019  // estimators
1020  Double_t trainE = -1;
1021  Double_t testE = -1;
1022 
1023  // start training cycles (epochs)
1024  for (Int_t i = 0; i < nEpochs; i++) {
1025 
1026  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1027  if ((i+1)%fTestRate == 0 || (i == 0)) {
1028  if (fSamplingTraining) {
1029  Data()->SetCurrentType( Types::kTraining );
1030  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1031  Data()->CreateSampling();
1032  }
1033  if (fSamplingTesting) {
1034  Data()->SetCurrentType( Types::kTesting );
1035  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1036  Data()->CreateSampling();
1037  }
1038  }
1039  }
1040  else {
1041  Data()->SetCurrentType( Types::kTraining );
1042  Data()->InitSampling(1.0,1.0);
1043  Data()->SetCurrentType( Types::kTesting );
1044  Data()->InitSampling(1.0,1.0);
1045  }
1046  Data()->SetCurrentType( Types::kTraining );
1047 
1048  TrainOneEpoch();
1049  DecaySynapseWeights(i >= lateEpoch);
1050 
1051  // monitor convergence of training and control sample
1052  if ((i+1)%fTestRate == 0) {
1053  trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
1054  testE = CalculateEstimator( Types::kTesting, i ); // estimator for test samplea
1055  fEstimatorHistTrain->Fill( i+1, trainE );
1056  fEstimatorHistTest ->Fill( i+1, testE );
1057 
1058  Bool_t success = kFALSE;
1059  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1060  success = kTRUE;
1061  }
1062  Data()->EventResult( success );
1063 
1064  SetCurrentValue( testE );
1065  if (HasConverged()) {
1066  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1067  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1068  i = newEpoch;
1069  ResetConvergenceCounter();
1070  }
1071  else {
1072  if (lateEpoch > i) lateEpoch = i;
1073  else break;
1074  }
1075  }
1076  }
1077 
1078  // draw progress bar (add convergence value)
1079  TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
1080  if (fSteps > 0) {
1081  Float_t progress = 0;
1082  if (Float_t(i)/nEpochs < fSamplingEpoch)
1083  progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1084  else
1085  progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1086 
1087  timer.DrawProgressBar( Int_t(progress), convText );
1088  }
1089  else {
1090  timer.DrawProgressBar( i, convText );
1091  }
1092  }
1093 }
1094 
1095 ////////////////////////////////////////////////////////////////////////////////
1096 /// train network over a single epoch/cyle of events
1097 
1099 {
1100  Int_t nEvents = Data()->GetNEvents();
1101 
1102  // randomize the order events will be presented, important for sequential mode
1103  Int_t* index = new Int_t[nEvents];
1104  for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1105  Shuffle(index, nEvents);
1106 
1107  // loop over all training events
1108  for (Int_t i = 0; i < nEvents; i++) {
1109 
1110  const Event * ev = GetEvent(index[i]);
1111  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1112  && (Data()->GetCurrentType() == Types::kTraining)){
1113  continue;
1114  }
1115 
1116  TrainOneEvent(index[i]);
1117 
1118  // do adjustments if in batch mode
1119  if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1120  AdjustSynapseWeights();
1121  if (fgPRINT_BATCH) {
1122  PrintNetwork();
1123  WaitForKeyboard();
1124  }
1125  }
1126 
1127  // debug in sequential mode
1128  if (fgPRINT_SEQ) {
1129  PrintNetwork();
1130  WaitForKeyboard();
1131  }
1132  }
1133 
1134  delete[] index;
1135 }
1136 
1137 ////////////////////////////////////////////////////////////////////////////////
1138 /// Input:
1139 /// index: the array to shuffle
1140 /// n: the size of the array
1141 /// Output:
1142 /// index: the shuffled indexes
1143 /// This method is used for sequential training
1144 
1146 {
1147  Int_t j, k;
1148  Int_t a = n - 1;
1149  for (Int_t i = 0; i < n; i++) {
1150  j = (Int_t) (frgen->Rndm() * a);
1151  if (j<n){ // address the 'worries' of coverity
1152  k = index[j];
1153  index[j] = index[i];
1154  index[i] = k;
1155  }
1156  }
1157 }
1158 
1159 ////////////////////////////////////////////////////////////////////////////////
1160 /// decay synapse weights
1161 /// in last 10 epochs, lower learning rate even more to find a good minimum
1162 
1164 {
1165  TSynapse* synapse;
1166  Int_t numSynapses = fSynapses->GetEntriesFast();
1167  for (Int_t i = 0; i < numSynapses; i++) {
1168  synapse = (TSynapse*)fSynapses->At(i);
1169  if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square.
1170  else synapse->DecayLearningRate(fDecayRate);
1171  }
1172 }
1173 
1174 ////////////////////////////////////////////////////////////////////////////////
1175 /// fast per-event training
1176 
1178 {
1179  GetEvent(ievt);
1180 
1181  // as soon as we know how to get event weights, get that here
1182 
1183  // note: the normalization of event weights will affect the choice
1184  // of learning rate, one will have to experiment to get the right value.
1185  // in general, if the "average" event weight is 1, the learning rate
1186  // should be good if set around 0.02 (a good value if all event weights are 1)
1187  Double_t eventWeight = 1.0;
1188 
1189  // get the desired output of this event
1190  Double_t desired;
1191  if (type == 0) desired = fOutput->GetMin(); // background //zjh
1192  else desired = fOutput->GetMax(); // signal //zjh
1193 
1194  // force the value for each input neuron
1195  Double_t x;
1196  TNeuron* neuron;
1197 
1198  for (UInt_t j = 0; j < GetNvar(); j++) {
1199  x = branchVar[j];
1200  if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1201  neuron = GetInputNeuron(j);
1202  neuron->ForceValue(x);
1203  }
1204 
1205  ForceNetworkCalculations();
1206  UpdateNetwork(desired, eventWeight);
1207 }
1208 
1209 ////////////////////////////////////////////////////////////////////////////////
1210 /// train network over a single event
1211 /// this uses the new event model
1212 
1214 {
1215  // note: the normalization of event weights will affect the choice
1216  // of learning rate, one will have to experiment to get the right value.
1217  // in general, if the "average" event weight is 1, the learning rate
1218  // should be good if set around 0.02 (a good value if all event weights are 1)
1219 
1220  const Event * ev = GetEvent(ievt);
1221  Double_t eventWeight = ev->GetWeight();
1222  ForceNetworkInputs( ev );
1223  ForceNetworkCalculations();
1224  if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1225  if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1226  else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1227 }
1228 
1229 ////////////////////////////////////////////////////////////////////////////////
1230 /// get the desired output of this event
1231 
1233 {
1234  return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
1235 }
1236 
1237 
1238 ////////////////////////////////////////////////////////////////////////////////
1239 /// update the network based on how closely
1240 /// the output matched the desired output
1241 
1243 {
1244  Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1245  if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ; //zjh
1246  else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
1247  else Log() << kFATAL << "Estimator type unspecified!!" << Endl; //zjh
1248  error *= eventWeight;
1249  GetOutputNeuron()->SetError(error);
1250  CalculateNeuronDeltas();
1251  UpdateSynapses();
1252 }
1253 
1254 ////////////////////////////////////////////////////////////////////////////////
1255 /// update the network based on how closely
1256 /// the output matched the desired output
1257 
1258 void TMVA::MethodMLP::UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight)
1259 {
1260  for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1261  Double_t error = GetOutputNeuron( i )->GetActivationValue() - desired.at(i);
1262  error *= eventWeight;
1263  GetOutputNeuron( i )->SetError(error);
1264  }
1265  CalculateNeuronDeltas();
1266  UpdateSynapses();
1267 }
1268 
1269 
1270 ////////////////////////////////////////////////////////////////////////////////
1271 /// have each neuron calculate its delta by backpropagation
1272 
1274 {
1275  TNeuron* neuron;
1276  Int_t numNeurons;
1277  Int_t numLayers = fNetwork->GetEntriesFast();
1278  TObjArray* curLayer;
1279 
1280  // step backwards through the network (backpropagation)
1281  // deltas calculated starting at output layer
1282  for (Int_t i = numLayers-1; i >= 0; i--) {
1283  curLayer = (TObjArray*)fNetwork->At(i);
1284  numNeurons = curLayer->GetEntriesFast();
1285 
1286  for (Int_t j = 0; j < numNeurons; j++) {
1287  neuron = (TNeuron*) curLayer->At(j);
1288  neuron->CalculateDelta();
1289  }
1290  }
1291 }
1292 
1293 ////////////////////////////////////////////////////////////////////////////////
1294 /// create genetics class similar to GeneticCut
1295 /// give it vector of parameter ranges (parameters = weights)
1296 /// link fitness function of this class to ComputeEstimator
1297 /// instantiate GA (see MethodCuts)
1298 /// run it
1299 /// then this should exist for GA, Minuit and random sampling
1300 
1302 {
1303  PrintMessage("Minimizing Estimator with GA");
1304 
1305  // define GA parameters
1306  fGA_preCalc = 1;
1307  fGA_SC_steps = 10;
1308  fGA_SC_rate = 5;
1309  fGA_SC_factor = 0.95;
1310  fGA_nsteps = 30;
1311 
1312  // ranges
1313  std::vector<Interval*> ranges;
1314 
1315  Int_t numWeights = fSynapses->GetEntriesFast();
1316  for (Int_t ivar=0; ivar< numWeights; ivar++) {
1317  ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1318  }
1319 
1320  FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
1321  gf->Run();
1322 
1323  Double_t estimator = CalculateEstimator();
1324  Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
1325 }
1326 
1327 ////////////////////////////////////////////////////////////////////////////////
1328 /// interface to the estimate
1329 
1330 Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1331 {
1332  return ComputeEstimator( parameters );
1333 }
1334 
1335 ////////////////////////////////////////////////////////////////////////////////
1336 /// this function is called by GeneticANN for GA optimization
1337 
1338 Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1339 {
1340  TSynapse* synapse;
1341  Int_t numSynapses = fSynapses->GetEntriesFast();
1342 
1343  for (Int_t i = 0; i < numSynapses; i++) {
1344  synapse = (TSynapse*)fSynapses->At(i);
1345  synapse->SetWeight(parameters.at(i));
1346  }
1347  if (fUseRegulator) UpdatePriors(); //zjh
1348 
1349  Double_t estimator = CalculateEstimator();
1350 
1351  return estimator;
1352 }
1353 
1354 ////////////////////////////////////////////////////////////////////////////////
1355 /// update synapse error fields and adjust the weights (if in sequential mode)
1356 
1358 {
1359  TNeuron* neuron;
1360  Int_t numNeurons;
1361  TObjArray* curLayer;
1362  Int_t numLayers = fNetwork->GetEntriesFast();
1363 
1364  for (Int_t i = 0; i < numLayers; i++) {
1365  curLayer = (TObjArray*)fNetwork->At(i);
1366  numNeurons = curLayer->GetEntriesFast();
1367 
1368  for (Int_t j = 0; j < numNeurons; j++) {
1369  neuron = (TNeuron*) curLayer->At(j);
1370  if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1371  else neuron->UpdateSynapsesSequential();
1372  }
1373  }
1374 }
1375 
1376 ////////////////////////////////////////////////////////////////////////////////
1377 /// just adjust the synapse weights (should be called in batch mode)
1378 
1380 {
1381  TNeuron* neuron;
1382  Int_t numNeurons;
1383  TObjArray* curLayer;
1384  Int_t numLayers = fNetwork->GetEntriesFast();
1385 
1386  for (Int_t i = numLayers-1; i >= 0; i--) {
1387  curLayer = (TObjArray*)fNetwork->At(i);
1388  numNeurons = curLayer->GetEntriesFast();
1389 
1390  for (Int_t j = 0; j < numNeurons; j++) {
1391  neuron = (TNeuron*) curLayer->At(j);
1392  neuron->AdjustSynapseWeights();
1393  }
1394  }
1395 }
1396 
1397 ////////////////////////////////////////////////////////////////////////////////
1398 
1400 {
1401  fPrior=0;
1402  fPriorDev.clear();
1403  Int_t nSynapses = fSynapses->GetEntriesFast();
1404  for (Int_t i=0;i<nSynapses;i++) {
1405  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1406  fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1407  fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1408  }
1409 }
1410 
1411 ////////////////////////////////////////////////////////////////////////////////
1412 
1414 {
1415  TMatrixD InvH(0,0);
1416  GetApproxInvHessian(InvH);
1417  Int_t numSynapses=fSynapses->GetEntriesFast();
1418  Int_t numRegulators=fRegulators.size();
1419  Float_t gamma=0,
1420  variance=1.; // Gaussian noise
1421  std::vector<Int_t> nWDP(numRegulators);
1422  std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1423  for (int i=0;i<numSynapses;i++) {
1424  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1425  Int_t idx=fRegulatorIdx[i];
1426  nWDP[idx]++;
1427  trace[idx]+=InvH[i][i];
1428  gamma+=1-fRegulators[idx]*InvH[i][i];
1429  weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1430  }
1431  if (fEstimator==kMSE) {
1432  if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1433  else variance=CalculateEstimator( Types::kTraining, 0 );
1434  }
1435 
1436  //Log() << kDEBUG << Endl;
1437  for (int i=0;i<numRegulators;i++)
1438  {
1439  //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
1440  fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1441  if (fRegulators[i]<0) fRegulators[i]=0;
1442  Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
1443  }
1444  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
1445  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
1446 
1447  Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
1448 
1449 }
1450 
1451 ////////////////////////////////////////////////////////////////////////////////
1452 
1453 void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate) //zjh
1454 {
1455  Int_t numSynapses=fSynapses->GetEntriesFast();
1456  InvHessian.ResizeTo( numSynapses, numSynapses );
1457  InvHessian=0;
1458  TMatrixD sens(numSynapses,1);
1459  TMatrixD sensT(1,numSynapses);
1460  Int_t nEvents = GetNEvents();
1461  for (Int_t i=0;i<nEvents;i++) {
1462  GetEvent(i);
1463  double outputValue=GetMvaValue(); // force calculation
1464  GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1465  CalculateNeuronDeltas();
1466  for (Int_t j = 0; j < numSynapses; j++){
1467  TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1468  synapses->InitDelta();
1469  synapses->CalculateDelta();
1470  sens[j][0]=sensT[0][j]=synapses->GetDelta();
1471  }
1472  if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1473  else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1474  }
1475 
1476  // TVectorD eValue(numSynapses);
1477  if (regulate) {
1478  for (Int_t i = 0; i < numSynapses; i++){
1479  InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1480  }
1481  }
1482  else {
1483  for (Int_t i = 0; i < numSynapses; i++){
1484  InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
1485  }
1486  }
1487 
1488  InvHessian.Invert();
1489 
1490 }
1491 
1492 ////////////////////////////////////////////////////////////////////////////////
1493 
1495 {
1496  Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation
1497 
1498  // no hessian (old training file) or no error reqested
1499  if (!fCalculateErrors || errLower==0 || errUpper==0)
1500  return MvaValue;
1501 
1502  Double_t MvaUpper,MvaLower,median,variance;
1503  Int_t numSynapses=fSynapses->GetEntriesFast();
1504  if (fInvHessian.GetNcols()!=numSynapses) {
1505  Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
1506  }
1507  TMatrixD sens(numSynapses,1);
1508  TMatrixD sensT(1,numSynapses);
1509  GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1510  //GetOutputNeuron()->SetError(1.);
1511  CalculateNeuronDeltas();
1512  for (Int_t i = 0; i < numSynapses; i++){
1513  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1514  synapses->InitDelta();
1515  synapses->CalculateDelta();
1516  sensT[0][i]=synapses->GetDelta();
1517  }
1518  sens.Transpose(sensT);
1519  TMatrixD sig=sensT*fInvHessian*sens;
1520  variance=sig[0][0];
1521  median=GetOutputNeuron()->GetValue();
1522 
1523  if (variance<0) {
1524  Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl;
1525  variance=0;
1526  }
1527  variance=sqrt(variance);
1528 
1529  //upper
1530  MvaUpper=fOutput->Eval(median+variance);
1531  if(errUpper)
1532  *errUpper=MvaUpper-MvaValue;
1533 
1534  //lower
1535  MvaLower=fOutput->Eval(median-variance);
1536  if(errLower)
1537  *errLower=MvaValue-MvaLower;
1538 
1539  return MvaValue;
1540 }
1541 
1542 
1543 #ifdef MethodMLP_UseMinuit__
1544 
1545 ////////////////////////////////////////////////////////////////////////////////
1546 /// minimize using Minuit
1547 
1548 void TMVA::MethodMLP::MinuitMinimize()
1549 {
1550  fNumberOfWeights = fSynapses->GetEntriesFast();
1551 
1552  TFitter* tfitter = new TFitter( fNumberOfWeights );
1553 
1554  // minuit-specific settings
1555  Double_t args[10];
1556 
1557  // output level
1558  args[0] = 2; // put to 0 for results only, or to -1 for no garbage
1559  tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
1560  tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
1561 
1562  double w[54];
1563 
1564  // init parameters
1565  for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1566  TString parName = Form("w%i", ipar);
1567  tfitter->SetParameter( ipar,
1568  parName, w[ipar], 0.1, 0, 0 );
1569  }
1570 
1571  // define the CFN function
1572  tfitter->SetFCN( &IFCN );
1573 
1574  // define fit strategy
1575  args[0] = 2;
1576  tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
1577 
1578  // now do the fit !
1579  args[0] = 1e-04;
1580  tfitter->ExecuteCommand( "MIGRAD", args, 1 );
1581 
1582  Bool_t doBetter = kFALSE;
1583  Bool_t doEvenBetter = kFALSE;
1584  if (doBetter) {
1585  args[0] = 1e-04;
1586  tfitter->ExecuteCommand( "IMPROVE", args, 1 );
1587 
1588  if (doEvenBetter) {
1589  args[0] = 500;
1590  tfitter->ExecuteCommand( "MINOS", args, 1 );
1591  }
1592  }
1593 }
1594 
1595 _____________________________________________________________________________
1596 void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1597 {
1598  // Evaluate the minimisation function ----------------------------------------------------
1599  //
1600  // Input parameters:
1601  // npars: number of currently variable parameters
1602  // CAUTION: this is not (necessarily) the dimension of the fitPars vector !
1603  // fitPars: array of (constant and variable) parameters
1604  // iflag: indicates what is to be calculated (see example below)
1605  // grad: array of gradients
1606  //
1607  // Output parameters:
1608  // f: the calculated function value.
1609  // grad: the (optional) vector of first derivatives).
1610  // ---------------------------------------------------------------------------------------
1611  ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1612 }
1613 
1614 TTHREAD_TLS(Int_t) nc = 0;
1615 TTHREAD_TLS(double) minf = 1000000;
1616 
1617 void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1618 {
1619  // first update the weights
1620  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1621  TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1622  synapse->SetWeight(fitPars[ipar]);
1623  }
1624 
1625  // now compute the estimator
1626  f = CalculateEstimator();
1627 
1628  nc++;
1629  if (f < minf) minf = f;
1630  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
1631  Log() << kDEBUG << Endl;
1632  Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
1633 }
1634 
1635 ////////////////////////////////////////////////////////////////////////////////
1636 /// global "this" pointer to be used in minuit
1637 
1638 TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1639 {
1640  return fgThis;
1641 }
1642 
1643 #endif
1644 
1645 
1646 ////////////////////////////////////////////////////////////////////////////////
1647 /// write specific classifier response
1648 
1649 void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1650 {
1651  MethodANNBase::MakeClassSpecific(fout, className);
1652 }
1653 
1654 ////////////////////////////////////////////////////////////////////////////////
1655 /// get help message text
1656 ///
1657 /// typical length of text line:
1658 /// "|--------------------------------------------------------------|"
1659 
1661 {
1662  TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1663  TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1664 
1665  Log() << Endl;
1666  Log() << col << "--- Short description:" << colres << Endl;
1667  Log() << Endl;
1668  Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1669  Log() << "forward multilayer perceptron impementation. The MLP has a user-" << Endl;
1670  Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
1671  Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1672  Log() << "signal and one background). " << Endl;
1673  Log() << Endl;
1674  Log() << col << "--- Performance optimisation:" << colres << Endl;
1675  Log() << Endl;
1676  Log() << "Neural networks are stable and performing for a large variety of " << Endl;
1677  Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
1678  Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1679  Log() << "number of input variables that have only little discrimination power. " << Endl;
1680  Log() << "" << Endl;
1681  Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1682  Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1683  Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1684  Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1685  Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
1686  Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
1687  Log() << "competitive results)." << Endl;
1688  Log() << Endl;
1689  Log() << col << "Overtraining: " << colres
1690  << "only the TMlpANN performs an explicit separation of the" << Endl;
1691  Log() << "full training sample into independent training and validation samples." << Endl;
1692  Log() << "We have found that in most high-energy physics applications the " << Endl;
1693  Log() << "avaliable degrees of freedom (training events) are sufficient to " << Endl;
1694  Log() << "constrain the weights of the relatively simple architectures required" << Endl;
1695  Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
1696  Log() << "the use of validation samples would only reduce the available training" << Endl;
1697  Log() << "information. However, if the perrormance on the training sample is " << Endl;
1698  Log() << "found to be significantly better than the one found with the inde-" << Endl;
1699  Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
1700  Log() << "are printed to standard output at the end of each training job." << Endl;
1701  Log() << Endl;
1702  Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
1703  Log() << Endl;
1704  Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
1705  Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1706  Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
1707  Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1708  Log() << "in favour of more neurons in the first hidden layer." << Endl;
1709  Log() << "" << Endl;
1710  Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
1711  Log() << "adjustable weights is small compared to the training sample size," << Endl;
1712  Log() << "using a large number of training samples should not lead to overtraining." << Endl;
1713 }
1714 
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
void GetHelpMessage() const
get help message text
Definition: MethodMLP.cxx:1660
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3159
An array of TObjects.
Definition: TObjArray.h:39
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:966
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Definition: TMatrixT.cxx:1460
virtual ~MethodMLP()
destructor nothing to be done
Definition: MethodMLP.cxx:127
Double_t Log(Double_t x)
Definition: TMath.h:526
void Init()
default initializations
Definition: MethodMLP.cxx:146
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
Definition: TNeuron.cxx:87
void DecayLearningRate(Double_t rate)
Definition: TSynapse.h:68
Double_t GetError()
Definition: MethodMLP.cxx:929
float Float_t
Definition: RtypesCore.h:53
void SetDEDw(Double_t DEDw)
Definition: TSynapse.h:91
Config & gConfig()
Double_t GetDelta()
Definition: TSynapse.h:93
void TrainOneEpoch()
train network over a single epoch/cyle of events
Definition: MethodMLP.cxx:1098
EAnalysisType
Definition: Types.h:124
void SteepestDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:737
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
Definition: MethodMLP.cxx:771
THist< 1, float > TH1F
Definition: THist.h:315
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
TArc * a
Definition: textangle.C:12
const Bool_t kFALSE
Definition: Rtypes.h:92
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
Definition: MethodMLP.cxx:1177
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
Definition: TFitter.cxx:86
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: TrainingMetho...
Definition: MethodMLP.cxx:173
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Definition: MethodMLP.cxx:270
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:352
STL namespace.
Int_t GetEntriesFast() const
Definition: TObjArray.h:66
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Definition: MethodMLP.cxx:459
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:376
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
Definition: TMatrixT.cxx:1201
ClassImp(TIterator) Bool_t TIterator return false
Compare two iterator objects.
Definition: TIterator.cxx:20
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
Definition: MethodMLP.cxx:913
const char * Data() const
Definition: TString.h:349
double sqrt(double)
Tools & gTools()
Definition: Tools.cxx:79
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
Definition: TNeuron.cxx:268
TStopwatch timer
Definition: pirndm.C:37
Double_t x[n]
Definition: legend1.C:17
void Shuffle(Int_t *index, Int_t n)
Input: index: the array to shuffle n: the size of the array Output: index: the shuffled indexes This ...
Definition: MethodMLP.cxx:1145
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:73
void(* FCN)(Int_t &npar, Double_t *gin, Double_t &f, Double_t *u, Int_t flag)
Definition: testMinim.cxx:39
void CalculateDelta()
calculate/adjust the error field for this synapse
Definition: TSynapse.cxx:111
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
Definition: MethodMLP.cxx:1453
std::vector< std::vector< double > > Data
void ComputeDEDw()
Definition: MethodMLP.cxx:660
virtual void ProcessOptions()
do nothing specific at this moment
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Definition: MethodMLP.cxx:1213
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
Definition: TMatrixT.cxx:1387
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
Definition: MethodMLP.cxx:1357
std::vector< Float_t > & GetTargets()
Definition: Event.h:102
Double_t GetCEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:983
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Definition: MethodMLP.cxx:1301
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodMLP.cxx:1649
#define None
Definition: TGWin32.h:68
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:24
void InitializeLearningRates()
initialize learning rates of synapses, used only by backpropagation
Definition: MethodMLP.cxx:256
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum ...
Definition: MethodMLP.cxx:1163
const int nEvents
Definition: testRooFit.cxx:42
Double_t GetDEDw()
Definition: TSynapse.h:92
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
Definition: MethodMLP.cxx:1232
double gamma(double x)
void SetLearningRate(Double_t rate)
Definition: TSynapse.h:62
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Definition: MethodMLP.cxx:750
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Definition: MethodMLP.cxx:1494
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with backpropagation algorithm
Definition: MethodMLP.cxx:1000
SVector< double, 2 > v
Definition: Dict.h:5
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
Definition: MethodMLP.cxx:803
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
Definition: MethodMLP.cxx:1242
virtual void SetFCN(void *fcn)
Specify the address of the fitting algorithm (from the interpreter)
Definition: TFitter.cxx:545
Double_t GetWeight()
Definition: TSynapse.h:59
void SetWeight(Double_t w)
Sets the weight of the synapse.
Definition: TSynapse.cxx:105
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Definition: MethodMLP.cxx:635
#define ClassImp(name)
Definition: Rtypes.h:279
double f(double x)
void CalculateNeuronDeltas()
have each neuron calculate its delta by backpropagation
Definition: MethodMLP.cxx:1273
double Double_t
Definition: RtypesCore.h:55
void InitDelta()
Definition: TSynapse.h:89
Describe directory structure in memory.
Definition: TDirectory.h:41
int type
Definition: TGX11.cxx:120
The TH1 histogram class.
Definition: TH1.h:80
void SimulateEvent(const Event *ev)
Definition: MethodMLP.cxx:697
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
Definition: MethodMLP.cxx:1379
UInt_t GetClass() const
Definition: Event.h:86
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
Definition: TNeuron.cxx:247
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
Definition: TNeuron.cxx:229
#define name(a, b)
Definition: linkTestLib0.cpp:5
void SetWeight(Double_t weight)
set synapse weight
Definition: TSynapse.cxx:71
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Definition: MethodMLP.cxx:134
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:101
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption, TDirectory *theTargetDir=0)
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
void ProcessOptions()
process user options
Definition: MethodMLP.cxx:225
Double_t EstimatorFunction(std::vector< Double_t > &parameters)
interface to the estimate
Definition: MethodMLP.cxx:1330
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
Definition: MethodMLP.cxx:1338
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:183
Bool_t WriteOptionsReference() const
Definition: Config.h:66
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition: Tools.cxx:127
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
TObject * At(Int_t idx) const
Definition: TObjArray.h:167
Double_t DerivDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:788
void UpdateRegulators()
Definition: MethodMLP.cxx:1413
double exp(double)
static void output(int code)
Definition: gifencode.c:226
const Bool_t kTRUE
Definition: Rtypes.h:91
void CalculateDelta()
calculate error field
Definition: TNeuron.cxx:119
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
const Int_t n
Definition: legend1.C:16
Definition: math.cpp:60
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter ipar : parameter number parname : parameter name value : initial p...
Definition: TFitter.cxx:587