Logo ROOT  
Reference Guide
MethodMLP.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard v. Toerne, Jan Therhaag, Jiahang Zhong
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodMLP *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * ANN Multilayer Perceptron class for the discrimination of signal *
12 * from background. BFGS implementation based on TMultiLayerPerceptron *
13 * class from ROOT (http://root.cern.ch). *
14 * *
15 * Authors (alphabetical): *
16 * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18 * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19 * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20 * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21 * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22 * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25 * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26 * *
27 * Copyright (c) 2005-2011: *
28 * CERN, Switzerland *
29 * U. of Victoria, Canada *
30 * MPI-K Heidelberg, Germany *
31 * U. of Bonn, Germany *
32 * *
33 * Redistribution and use in source and binary forms, with or without *
34 * modification, are permitted according to the terms listed in LICENSE *
35 * (http://tmva.sourceforge.net/LICENSE) *
36 **********************************************************************************/
37
38/*! \class TMVA::MethodMLP
39\ingroup TMVA
40
41Multilayer Perceptron class built off of MethodANNBase
42
43*/
44
45#include "TMVA/MethodMLP.h"
46
47#include "TMVA/Config.h"
48#include "TMVA/Configurable.h"
51#include "TMVA/DataSet.h"
52#include "TMVA/DataSetInfo.h"
53#include "TMVA/FitterBase.h"
54#include "TMVA/GeneticFitter.h"
55#include "TMVA/IFitterTarget.h"
56#include "TMVA/IMethod.h"
57#include "TMVA/Interval.h"
58#include "TMVA/MethodANNBase.h"
59#include "TMVA/MsgLogger.h"
60#include "TMVA/TNeuron.h"
61#include "TMVA/TSynapse.h"
62#include "TMVA/Timer.h"
63#include "TMVA/Tools.h"
64#include "TMVA/Types.h"
65
66#include "TH1.h"
67#include "TString.h"
68#include "TTree.h"
69#include "Riostream.h"
70#include "TFitter.h"
71#include "TMatrixD.h"
72#include "TMath.h"
73#include "TFile.h"
74
75#include <cmath>
76#include <vector>
77
78#ifdef MethodMLP_UseMinuit__
79TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
80Bool_t MethodMLP_UseMinuit = kTRUE;
81#endif
82
84
86
87 using std::vector;
88
89////////////////////////////////////////////////////////////////////////////////
90/// standard constructor
91
93 const TString& methodTitle,
94 DataSetInfo& theData,
95 const TString& theOption)
96 : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption),
97 fUseRegulator(false), fCalculateErrors(false),
98 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
99 fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
100 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
101 fSamplingTraining(false), fSamplingTesting(false),
102 fLastAlpha(0.0), fTau(0.),
103 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
104 fBPMode(kSequential), fBpModeS("None"),
105 fBatchSize(0), fTestRate(0), fEpochMon(false),
106 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
107 fGA_SC_rate(0), fGA_SC_factor(0.0),
108 fDeviationsFromTargets(0),
109 fWeightRange (1.0)
110{
111
112}
113
114////////////////////////////////////////////////////////////////////////////////
115/// constructor from a weight file
116
118 const TString& theWeightFile)
119 : MethodANNBase( Types::kMLP, theData, theWeightFile),
120 fUseRegulator(false), fCalculateErrors(false),
121 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
122 fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
123 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
124 fSamplingTraining(false), fSamplingTesting(false),
125 fLastAlpha(0.0), fTau(0.),
126 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
127 fBPMode(kSequential), fBpModeS("None"),
128 fBatchSize(0), fTestRate(0), fEpochMon(false),
129 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
130 fGA_SC_rate(0), fGA_SC_factor(0.0),
131 fDeviationsFromTargets(0),
132 fWeightRange (1.0)
133{
134}
135
136////////////////////////////////////////////////////////////////////////////////
137/// destructor
138/// nothing to be done
139
141{
142}
143
145{
146 Train(NumCycles());
147}
148
149
150
151////////////////////////////////////////////////////////////////////////////////
152/// MLP can handle classification with 2 classes and regression with one regression-target
153
155{
156 if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
157 if (type == Types::kMulticlass ) return kTRUE;
158 if (type == Types::kRegression ) return kTRUE;
159
160 return kFALSE;
161}
162
163////////////////////////////////////////////////////////////////////////////////
164/// default initializations
165
167{
168 // the minimum requirement to declare an event signal-like
169 SetSignalReferenceCut( 0.5 );
170#ifdef MethodMLP_UseMinuit__
171 fgThis = this;
172#endif
173}
174
175////////////////////////////////////////////////////////////////////////////////
176/// define the options (their key words) that can be set in the option string
177///
178/// know options:
179///
180/// - TrainingMethod <string> Training method
181/// available values are:
182/// - BP Back-Propagation <default>
183/// - GA Genetic Algorithm (takes a LONG time)
184///
185/// - LearningRate <float> NN learning rate parameter
186/// - DecayRate <float> Decay rate for learning parameter
187/// - TestRate <int> Test for overtraining performed at each #th epochs
188///
189/// - BPMode <string> Back-propagation learning mode
190/// available values are:
191/// - sequential <default>
192/// - batch
193///
194/// - BatchSize <int> Batch size: number of events/batch, only set if in Batch Mode,
195/// - -1 for BatchSize=number_of_events
196
198{
199 DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
200 "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
201 AddPreDefVal(TString("BP"));
202 AddPreDefVal(TString("GA"));
203 AddPreDefVal(TString("BFGS"));
204
205 DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
206 DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
207 DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
208 DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
209
210 DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
211 DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
212 DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
213
214 DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
215 DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
216
217 DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
218 DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
219
220 DeclareOptionRef(fBpModeS="sequential", "BPMode",
221 "Back-propagation learning mode: sequential or batch");
222 AddPreDefVal(TString("sequential"));
223 AddPreDefVal(TString("batch"));
224
225 DeclareOptionRef(fBatchSize=-1, "BatchSize",
226 "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
227
228 DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
229 "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
230
231 DeclareOptionRef(fSteps=-1, "ConvergenceTests",
232 "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
233
234 DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
235 "Use regulator to avoid over-training"); //zjh
236 DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit",
237 "Maximum times of regulator update"); //zjh
238 DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
239 "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh
240
241 DeclareOptionRef(fWeightRange=1.0, "WeightRange",
242 "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
243
244}
245
246////////////////////////////////////////////////////////////////////////////////
247/// process user options
248
250{
252
253
254 if (IgnoreEventsWithNegWeightsInTraining()) {
255 Log() << kINFO
256 << "Will ignore negative events in training!"
257 << Endl;
258 }
259
260
261 if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
262 else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
263 else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
264
265 if (fBpModeS == "sequential") fBPMode = kSequential;
266 else if (fBpModeS == "batch") fBPMode = kBatch;
267
268 // InitializeLearningRates();
269
270 if (fBPMode == kBatch) {
271 Data()->SetCurrentType(Types::kTraining);
272 Int_t numEvents = Data()->GetNEvents();
273 if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
274 }
275}
276
277////////////////////////////////////////////////////////////////////////////////
278/// initialize learning rates of synapses, used only by back propagation
279
281{
282 Log() << kDEBUG << "Initialize learning rates" << Endl;
283 TSynapse *synapse;
284 Int_t numSynapses = fSynapses->GetEntriesFast();
285 for (Int_t i = 0; i < numSynapses; i++) {
286 synapse = (TSynapse*)fSynapses->At(i);
287 synapse->SetLearningRate(fLearnRate);
288 }
289}
290
291////////////////////////////////////////////////////////////////////////////////
292/// calculate the estimator that training is attempting to minimize
293
295{
296 // sanity check
297 if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
298 Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
299 }
300
301 Types::ETreeType saveType = Data()->GetCurrentType();
302 Data()->SetCurrentType(treeType);
303
304 // if epochs are counted create monitoring histograms (only available for classification)
305 TString type = (treeType == Types::kTraining ? "train" : "test");
306 TString name = Form("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
307 TString nameB = name + "_B";
308 TString nameS = name + "_S";
309 Int_t nbin = 100;
310 Float_t limit = 2;
311 TH1* histS = 0;
312 TH1* histB = 0;
313 if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
314 histS = new TH1F( nameS, nameS, nbin, -limit, limit );
315 histB = new TH1F( nameB, nameB, nbin, -limit, limit );
316 }
317
318 Double_t estimator = 0;
319
320 // loop over all training events
321 Int_t nEvents = GetNEvents();
322 UInt_t nClasses = DataInfo().GetNClasses();
323 UInt_t nTgts = DataInfo().GetNTargets();
324
325
326 Float_t sumOfWeights = 0.f;
327 if( fWeightRange < 1.f ){
328 fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents);
329 }
330
331 for (Int_t i = 0; i < nEvents; i++) {
332
333 const Event* ev = GetEvent(i);
334
335 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
336 && (saveType == Types::kTraining)){
337 continue;
338 }
339
340 Double_t w = ev->GetWeight();
341
342 ForceNetworkInputs( ev );
343 ForceNetworkCalculations();
344
345 Double_t d = 0, v = 0;
346 if (DoRegression()) {
347 for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
348 v = GetOutputNeuron( itgt )->GetActivationValue();
349 Double_t targetValue = ev->GetTarget( itgt );
350 Double_t dt = v - targetValue;
351 d += (dt*dt);
352 }
353 estimator += d*w;
354 } else if (DoMulticlass() ) {
355 UInt_t cls = ev->GetClass();
356 if (fEstimator==kCE){
357 Double_t norm(0);
358 for (UInt_t icls = 0; icls < nClasses; icls++) {
359 Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
360 norm += exp( activationValue );
361 if(icls==cls)
362 d = exp( activationValue );
363 }
364 d = -TMath::Log(d/norm);
365 }
366 else{
367 for (UInt_t icls = 0; icls < nClasses; icls++) {
368 Double_t desired = (icls==cls) ? 1.0 : 0.0;
369 v = GetOutputNeuron( icls )->GetActivationValue();
370 d = (desired-v)*(desired-v);
371 }
372 }
373 estimator += d*w; //zjh
374 } else {
375 Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
376 v = GetOutputNeuron()->GetActivationValue();
377 if (fEstimator==kMSE) d = (desired-v)*(desired-v); //zjh
378 else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v)); //zjh
379 estimator += d*w; //zjh
380 }
381
382 if( fDeviationsFromTargets )
383 fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
384
385 sumOfWeights += w;
386
387
388 // fill monitoring histograms
389 if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
390 else if (histB != 0) histB->Fill( float(v), float(w) );
391 }
392
393
394 if( fDeviationsFromTargets ) {
395 std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
396
397 Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
398 estimator = 0.f;
399
400 Float_t weightRangeCut = fWeightRange*sumOfWeights;
401 Float_t weightSum = 0.f;
402 for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
403 float deviation = (*itDev).first;
404 float devWeight = (*itDev).second;
405 weightSum += devWeight; // add the weight of this event
406 if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange
407 estimator += devWeight*deviation;
408 }
409 }
410
411 sumOfWeights = sumOfWeightsInRange;
412 delete fDeviationsFromTargets;
413 }
414
415 if (histS != 0) fEpochMonHistS.push_back( histS );
416 if (histB != 0) fEpochMonHistB.push_back( histB );
417
418 //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
419 //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
420 //else estimator = estimator*0.5/Float_t(nEvents);
421 estimator = estimator/Float_t(sumOfWeights);
422
423
424 //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh
425
426 Data()->SetCurrentType( saveType );
427
428 // provide epoch-wise monitoring
429 if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
430 CreateWeightMonitoringHists( Form("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
431 }
432
433 return estimator;
434}
435
436////////////////////////////////////////////////////////////////////////////////
437
439{
440 if (fNetwork == 0) {
441 //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
442 Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
443 SetAnalysisType(GetAnalysisType());
444 }
445 Log() << kDEBUG << "reinitialize learning rates" << Endl;
446 InitializeLearningRates();
447 Log() << kHEADER;
448 PrintMessage("Training Network");
449 Log() << Endl;
450 Int_t nEvents=GetNEvents();
451 Int_t nSynapses=fSynapses->GetEntriesFast();
452 if (nSynapses>nEvents)
453 Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
454
455 fIPyMaxIter = nEpochs;
456 if (fInteractive && fInteractive->NotInitialized()){
457 std::vector<TString> titles = {"Error on training set", "Error on test set"};
458 fInteractive->Init(titles);
459 }
460
461#ifdef MethodMLP_UseMinuit__
462 if (useMinuit) MinuitMinimize();
463#else
464 if (fTrainingMethod == kGA) GeneticMinimize();
465 else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
466 else BackPropagationMinimize(nEpochs);
467#endif
468
469 float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
470 float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
471 if (fUseRegulator){
472 Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
473 UpdateRegulators();
474 Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
475 }
476
477 if( fCalculateErrors || fUseRegulator )
478 {
479 Int_t numSynapses=fSynapses->GetEntriesFast();
480 fInvHessian.ResizeTo(numSynapses,numSynapses);
481 GetApproxInvHessian( fInvHessian ,false);
482 }
483 ExitFromTraining();
484}
485
486////////////////////////////////////////////////////////////////////////////////
487/// train network with BFGS algorithm
488
490{
491 Timer timer( (fSteps>0?100:nEpochs), GetName() );
492
493 // create histograms for overtraining monitoring
494 Int_t nbinTest = Int_t(nEpochs/fTestRate);
495 if(!IsSilentFile())
496 {
497 fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
498 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
499 fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
500 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
501 }
502
503 Int_t nSynapses = fSynapses->GetEntriesFast();
504 Int_t nWeights = nSynapses;
505
506 for (Int_t i=0;i<nSynapses;i++) {
507 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
508 synapse->SetDEDw(0.0);
509 }
510
511 std::vector<Double_t> buffer( nWeights );
512 for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
513
514 TMatrixD Dir ( nWeights, 1 );
515 TMatrixD Hessian ( nWeights, nWeights );
516 TMatrixD Gamma ( nWeights, 1 );
517 TMatrixD Delta ( nWeights, 1 );
518 Int_t RegUpdateCD=0; //zjh
519 Int_t RegUpdateTimes=0; //zjh
520 Double_t AccuError=0;
521
522 Double_t trainE = -1;
523 Double_t testE = -1;
524
525 fLastAlpha = 0.;
526
527 if(fSamplingTraining || fSamplingTesting)
528 Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
529
530 if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
531 timer.DrawProgressBar( 0 );
532
533 // start training cycles (epochs)
534 for (Int_t i = 0; i < nEpochs; i++) {
535
536 if (fExitFromTraining) break;
537 fIPyCurrentIter = i;
538 if (Float_t(i)/nEpochs < fSamplingEpoch) {
539 if ((i+1)%fTestRate == 0 || (i == 0)) {
540 if (fSamplingTraining) {
541 Data()->SetCurrentType( Types::kTraining );
542 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
543 Data()->CreateSampling();
544 }
545 if (fSamplingTesting) {
546 Data()->SetCurrentType( Types::kTesting );
547 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
548 Data()->CreateSampling();
549 }
550 }
551 }
552 else {
553 Data()->SetCurrentType( Types::kTraining );
554 Data()->InitSampling(1.0,1.0);
555 Data()->SetCurrentType( Types::kTesting );
556 Data()->InitSampling(1.0,1.0);
557 }
558 Data()->SetCurrentType( Types::kTraining );
559
560 //zjh
561 if (fUseRegulator) {
562 UpdatePriors();
563 RegUpdateCD++;
564 }
565 //zjh
566
567 SetGammaDelta( Gamma, Delta, buffer );
568
569 if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
570 SteepestDir( Dir );
571 Hessian.UnitMatrix();
572 RegUpdateCD=0; //zjh
573 }
574 else {
575 if (GetHessian( Hessian, Gamma, Delta )) {
576 SteepestDir( Dir );
577 Hessian.UnitMatrix();
578 RegUpdateCD=0; //zjh
579 }
580 else SetDir( Hessian, Dir );
581 }
582
583 Double_t dError=0; //zjh
584 if (DerivDir( Dir ) > 0) {
585 SteepestDir( Dir );
586 Hessian.UnitMatrix();
587 RegUpdateCD=0; //zjh
588 }
589 if (LineSearch( Dir, buffer, &dError )) { //zjh
590 Hessian.UnitMatrix();
591 SteepestDir( Dir );
592 RegUpdateCD=0; //zjh
593 if (LineSearch(Dir, buffer, &dError)) { //zjh
594 i = nEpochs;
595 Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
596 }
597 }
598
599 //zjh+
600 if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
601 AccuError+=dError;
602
603 if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
604 Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
605 UpdateRegulators();
606 Hessian.UnitMatrix();
607 RegUpdateCD=0;
608 RegUpdateTimes++;
609 AccuError=0;
610 }
611 //zjh-
612
613 // monitor convergence of training and control sample
614 if ((i+1)%fTestRate == 0) {
615 //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample //zjh
616 //testE = CalculateEstimator( Types::kTesting, i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
617 trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample //zjh
618 testE = CalculateEstimator( Types::kTesting, i ) ; // estimator for test sample //zjh
619 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
620 if(!IsSilentFile()) //saved to see in TMVAGui, no needed without file
621 {
622 fEstimatorHistTrain->Fill( i+1, trainE );
623 fEstimatorHistTest ->Fill( i+1, testE );
624 }
625 Bool_t success = kFALSE;
626 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
627 success = kTRUE;
628 }
629 Data()->EventResult( success );
630
631 SetCurrentValue( testE );
632 if (HasConverged()) {
633 if (Float_t(i)/nEpochs < fSamplingEpoch) {
634 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
635 i = newEpoch;
636 ResetConvergenceCounter();
637 }
638 else break;
639 }
640 }
641
642 // draw progress
643 TString convText = Form( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh
644 if (fSteps > 0) {
645 Float_t progress = 0;
646 if (Float_t(i)/nEpochs < fSamplingEpoch)
647 // progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
648 progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
649 else
650 {
651 // progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
652 progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
653 }
654 Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
655 if (progress2>progress) progress=progress2; //zjh
656 timer.DrawProgressBar( Int_t(progress), convText );
657 }
658 else {
659 Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
660 if (progress<i) progress=i; //zjh
661 timer.DrawProgressBar( progress, convText ); //zjh
662 }
663
664 // some verbose output
665 if (fgPRINT_SEQ) {
666 PrintNetwork();
667 WaitForKeyboard();
668 }
669 }
670}
671
672////////////////////////////////////////////////////////////////////////////////
673
674void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
675{
676 Int_t nWeights = fSynapses->GetEntriesFast();
677
678 Int_t IDX = 0;
679 Int_t nSynapses = fSynapses->GetEntriesFast();
680 for (Int_t i=0;i<nSynapses;i++) {
681 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
682 Gamma[IDX++][0] = -synapse->GetDEDw();
683 }
684
685 for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
686
687 ComputeDEDw();
688
689 IDX = 0;
690 for (Int_t i=0;i<nSynapses;i++)
691 {
692 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
693 Gamma[IDX++][0] += synapse->GetDEDw();
694 }
695}
696
697////////////////////////////////////////////////////////////////////////////////
698
700{
701 Int_t nSynapses = fSynapses->GetEntriesFast();
702 for (Int_t i=0;i<nSynapses;i++) {
703 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
704 synapse->SetDEDw( 0.0 );
705 }
706
707 Int_t nEvents = GetNEvents();
708 Int_t nPosEvents = nEvents;
709 for (Int_t i=0;i<nEvents;i++) {
710
711 const Event* ev = GetEvent(i);
712 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
713 && (Data()->GetCurrentType() == Types::kTraining)){
714 --nPosEvents;
715 continue;
716 }
717
718 SimulateEvent( ev );
719
720 for (Int_t j=0;j<nSynapses;j++) {
721 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
722 synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
723 }
724 }
725
726 for (Int_t i=0;i<nSynapses;i++) {
727 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
728 Double_t DEDw=synapse->GetDEDw(); //zjh
729 if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
730 synapse->SetDEDw( DEDw / nPosEvents ); //zjh
731 }
732}
733
734////////////////////////////////////////////////////////////////////////////////
735
737{
738 Double_t eventWeight = ev->GetWeight();
739
740 ForceNetworkInputs( ev );
741 ForceNetworkCalculations();
742
743 if (DoRegression()) {
744 UInt_t ntgt = DataInfo().GetNTargets();
745 for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
746 Double_t desired = ev->GetTarget(itgt);
747 Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
748 GetOutputNeuron( itgt )->SetError(error);
749 }
750 } else if (DoMulticlass()) {
751 UInt_t nClasses = DataInfo().GetNClasses();
752 UInt_t cls = ev->GetClass();
753 for (UInt_t icls = 0; icls < nClasses; icls++) {
754 Double_t desired = ( cls==icls ? 1.0 : 0.0 );
755 Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
756 GetOutputNeuron( icls )->SetError(error);
757 }
758 } else {
759 Double_t desired = GetDesiredOutput( ev );
760 Double_t error=-1; //zjh
761 if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight; //zjh
762 else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
763 GetOutputNeuron()->SetError(error);
764 }
765
766 CalculateNeuronDeltas();
767 for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
768 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
769 synapse->InitDelta();
770 synapse->CalculateDelta();
771 }
772}
773
774////////////////////////////////////////////////////////////////////////////////
775
777{
778 Int_t IDX = 0;
779 Int_t nSynapses = fSynapses->GetEntriesFast();
780
781 for (Int_t i=0;i<nSynapses;i++) {
782 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
783 Dir[IDX++][0] = -synapse->GetDEDw();
784 }
785}
786
787////////////////////////////////////////////////////////////////////////////////
788
790{
792 if ((Double_t) gd[0][0] == 0.) return kTRUE;
793 TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
796 Double_t a = 1 / (Double_t) gd[0][0];
797 Double_t f = 1 + ((Double_t)gHg[0][0]*a);
799 res *= f;
800 res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
802 res *= a;
803 Hessian += res;
804
805 return kFALSE;
806}
807
808////////////////////////////////////////////////////////////////////////////////
809
811{
812 Int_t IDX = 0;
813 Int_t nSynapses = fSynapses->GetEntriesFast();
814 TMatrixD DEDw(nSynapses, 1);
815
816 for (Int_t i=0;i<nSynapses;i++) {
817 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
818 DEDw[IDX++][0] = synapse->GetDEDw();
819 }
820
821 dir = Hessian * DEDw;
822 for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
823}
824
825////////////////////////////////////////////////////////////////////////////////
826
828{
829 Int_t IDX = 0;
830 Int_t nSynapses = fSynapses->GetEntriesFast();
831 Double_t Result = 0.0;
832
833 for (Int_t i=0;i<nSynapses;i++) {
834 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
835 Result += Dir[IDX++][0] * synapse->GetDEDw();
836 }
837 return Result;
838}
839
840////////////////////////////////////////////////////////////////////////////////
841
842Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
843{
844 Int_t IDX = 0;
845 Int_t nSynapses = fSynapses->GetEntriesFast();
846 Int_t nWeights = nSynapses;
847
848 std::vector<Double_t> Origin(nWeights);
849 for (Int_t i=0;i<nSynapses;i++) {
850 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
851 Origin[i] = synapse->GetWeight();
852 }
853
854 Double_t err1 = GetError();
855 Double_t errOrigin=err1;//zjh
856 Double_t alpha1 = 0.;
857 Double_t alpha2 = fLastAlpha;
858
859
860 if (alpha2 < 0.01) alpha2 = 0.01;
861 else if (alpha2 > 2.0) alpha2 = 2.0;
862 Double_t alpha_original = alpha2;
863 Double_t alpha3 = alpha2;
864
865 SetDirWeights( Origin, Dir, alpha2 );
866 Double_t err2 = GetError();
867 //Double_t err2 = err1;
868 Double_t err3 = err2;
869 Bool_t bingo = kFALSE;
870
871
872 if (err1 > err2) {
873 for (Int_t i=0;i<100;i++) {
874 alpha3 *= fTau;
875 SetDirWeights(Origin, Dir, alpha3);
876 err3 = GetError();
877 if (err3 > err2) {
878 bingo = kTRUE;
879 break;
880 }
881 alpha1 = alpha2;
882 err1 = err2;
883 alpha2 = alpha3;
884 err2 = err3;
885 }
886 if (!bingo) {
887 SetDirWeights(Origin, Dir, 0.);
888 return kTRUE;
889 }
890 }
891 else {
892 for (Int_t i=0;i<100;i++) {
893 alpha2 /= fTau;
894 if (i==50) {
895 Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
896 alpha2 = -alpha_original;
897 }
898 SetDirWeights(Origin, Dir, alpha2);
899 err2 = GetError();
900 if (err1 > err2) {
901 bingo = kTRUE;
902 break;
903 }
904 alpha3 = alpha2;
905 err3 = err2;
906 }
907 if (!bingo) {
908 SetDirWeights(Origin, Dir, 0.);
909 Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
910 fLastAlpha = 0.05;
911 return kTRUE;
912 }
913 }
914
915 if (alpha1>0 && alpha2>0 && alpha3 > 0) {
916 fLastAlpha = 0.5 * (alpha1 + alpha3 -
917 (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
918 - ( err2 - err1 ) / (alpha2 - alpha1 )));
919 }
920 else {
921 fLastAlpha = alpha2;
922 }
923
924 fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
925
926 SetDirWeights(Origin, Dir, fLastAlpha);
927
928 // leaving these lines uncommented is a heavy price to pay for only a warning message
929 // (which shouldn't appear anyway)
930 // --> about 15% of time is spent in the final GetError().
931 //
932 Double_t finalError = GetError();
933 if (finalError > err1) {
934 Log() << kWARNING << "Line search increased error! Something is wrong."
935 << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
936 << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
937 }
938
939 for (Int_t i=0;i<nSynapses;i++) {
940 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
941 buffer[IDX] = synapse->GetWeight() - Origin[IDX];
942 IDX++;
943 }
944
945 if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
946
947 return kFALSE;
948}
949
950////////////////////////////////////////////////////////////////////////////////
951
952void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
953{
954 Int_t IDX = 0;
955 Int_t nSynapses = fSynapses->GetEntriesFast();
956
957 for (Int_t i=0;i<nSynapses;i++) {
958 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
959 synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
960 IDX++;
961 }
962 if (fUseRegulator) UpdatePriors();//zjh
963}
964
965
966////////////////////////////////////////////////////////////////////////////////
967
969{
970 Int_t nEvents = GetNEvents();
971 UInt_t ntgts = GetNTargets();
972 Double_t Result = 0.;
973
974 for (Int_t i=0;i<nEvents;i++) {
975 const Event* ev = GetEvent(i);
976
977 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
978 && (Data()->GetCurrentType() == Types::kTraining)){
979 continue;
980 }
981 SimulateEvent( ev );
982
983 Double_t error = 0.;
984 if (DoRegression()) {
985 for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
986 error += GetMSEErr( ev, itgt );//zjh
987 }
988 } else if ( DoMulticlass() ){
989 for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
990 error += GetMSEErr( ev, icls );
991 }
992 } else {
993 if (fEstimator==kMSE) error = GetMSEErr( ev ); //zjh
994 else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
995 }
996 Result += error * ev->GetWeight();
997 }
998 if (fUseRegulator) Result+=fPrior; //zjh
999 if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
1000 return Result;
1001}
1002
1003////////////////////////////////////////////////////////////////////////////////
1004
1006{
1007 Double_t error = 0;
1008 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1009 Double_t target = 0;
1010 if (DoRegression()) target = ev->GetTarget( index );
1011 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1012 else target = GetDesiredOutput( ev );
1013
1014 error = 0.5*(output-target)*(output-target); //zjh
1015
1016 return error;
1017
1018}
1019
1020////////////////////////////////////////////////////////////////////////////////
1021
1023{
1024 Double_t error = 0;
1025 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1026 Double_t target = 0;
1027 if (DoRegression()) target = ev->GetTarget( index );
1028 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1029 else target = GetDesiredOutput( ev );
1030
1031 error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
1032
1033 return error;
1034}
1035
1036////////////////////////////////////////////////////////////////////////////////
1037/// minimize estimator / train network with back propagation algorithm
1038
1040{
1041 // Timer timer( nEpochs, GetName() );
1042 Timer timer( (fSteps>0?100:nEpochs), GetName() );
1043 Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1044
1045 // create histograms for overtraining monitoring
1046 Int_t nbinTest = Int_t(nEpochs/fTestRate);
1047 if(!IsSilentFile())
1048 {
1049 fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
1050 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1051 fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
1052 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1053 }
1054 if(fSamplingTraining || fSamplingTesting)
1055 Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
1056
1057 if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
1058 timer.DrawProgressBar(0);
1059
1060 // estimators
1061 Double_t trainE = -1;
1062 Double_t testE = -1;
1063
1064 // start training cycles (epochs)
1065 for (Int_t i = 0; i < nEpochs; i++) {
1066
1067 if (fExitFromTraining) break;
1068 fIPyCurrentIter = i;
1069 if (Float_t(i)/nEpochs < fSamplingEpoch) {
1070 if ((i+1)%fTestRate == 0 || (i == 0)) {
1071 if (fSamplingTraining) {
1072 Data()->SetCurrentType( Types::kTraining );
1073 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1074 Data()->CreateSampling();
1075 }
1076 if (fSamplingTesting) {
1077 Data()->SetCurrentType( Types::kTesting );
1078 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1079 Data()->CreateSampling();
1080 }
1081 }
1082 }
1083 else {
1084 Data()->SetCurrentType( Types::kTraining );
1085 Data()->InitSampling(1.0,1.0);
1086 Data()->SetCurrentType( Types::kTesting );
1087 Data()->InitSampling(1.0,1.0);
1088 }
1089 Data()->SetCurrentType( Types::kTraining );
1090
1091 TrainOneEpoch();
1092 DecaySynapseWeights(i >= lateEpoch);
1093
1094 // monitor convergence of training and control sample
1095 if ((i+1)%fTestRate == 0) {
1096 trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
1097 testE = CalculateEstimator( Types::kTesting, i ); // estimator for test sample
1098 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1099 if(!IsSilentFile())
1100 {
1101 fEstimatorHistTrain->Fill( i+1, trainE );
1102 fEstimatorHistTest ->Fill( i+1, testE );
1103 }
1104 Bool_t success = kFALSE;
1105 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1106 success = kTRUE;
1107 }
1108 Data()->EventResult( success );
1109
1110 SetCurrentValue( testE );
1111 if (HasConverged()) {
1112 if (Float_t(i)/nEpochs < fSamplingEpoch) {
1113 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1114 i = newEpoch;
1115 ResetConvergenceCounter();
1116 }
1117 else {
1118 if (lateEpoch > i) lateEpoch = i;
1119 else break;
1120 }
1121 }
1122 }
1123
1124 // draw progress bar (add convergence value)
1125 TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
1126 if (fSteps > 0) {
1127 Float_t progress = 0;
1128 if (Float_t(i)/nEpochs < fSamplingEpoch)
1129 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1130 else
1131 progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1132
1133 timer.DrawProgressBar( Int_t(progress), convText );
1134 }
1135 else {
1136 timer.DrawProgressBar( i, convText );
1137 }
1138 }
1139}
1140
1141////////////////////////////////////////////////////////////////////////////////
1142/// train network over a single epoch/cycle of events
1143
1145{
1146 Int_t nEvents = Data()->GetNEvents();
1147
1148 // randomize the order events will be presented, important for sequential mode
1149 Int_t* index = new Int_t[nEvents];
1150 for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1151 Shuffle(index, nEvents);
1152
1153 // loop over all training events
1154 for (Int_t i = 0; i < nEvents; i++) {
1155
1156 const Event * ev = GetEvent(index[i]);
1157 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1158 && (Data()->GetCurrentType() == Types::kTraining)){
1159 continue;
1160 }
1161
1162 TrainOneEvent(index[i]);
1163
1164 // do adjustments if in batch mode
1165 if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1166 AdjustSynapseWeights();
1167 if (fgPRINT_BATCH) {
1168 PrintNetwork();
1169 WaitForKeyboard();
1170 }
1171 }
1172
1173 // debug in sequential mode
1174 if (fgPRINT_SEQ) {
1175 PrintNetwork();
1176 WaitForKeyboard();
1177 }
1178 }
1179
1180 delete[] index;
1181}
1182
1183////////////////////////////////////////////////////////////////////////////////
1184/// Input:
1185/// - index: the array to shuffle
1186/// - n: the size of the array
1187/// Output:
1188/// - index: the shuffled indexes
1189///
1190/// This method is used for sequential training
1191
1193{
1194 Int_t j, k;
1195 Int_t a = n - 1;
1196 for (Int_t i = 0; i < n; i++) {
1197 j = (Int_t) (frgen->Rndm() * a);
1198 if (j<n){ // address the 'worries' of coverity
1199 k = index[j];
1200 index[j] = index[i];
1201 index[i] = k;
1202 }
1203 }
1204}
1205
1206////////////////////////////////////////////////////////////////////////////////
1207/// decay synapse weights
1208/// in last 10 epochs, lower learning rate even more to find a good minimum
1209
1211{
1212 TSynapse* synapse;
1213 Int_t numSynapses = fSynapses->GetEntriesFast();
1214 for (Int_t i = 0; i < numSynapses; i++) {
1215 synapse = (TSynapse*)fSynapses->At(i);
1216 if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square.
1217 else synapse->DecayLearningRate(fDecayRate);
1218 }
1219}
1220
1221////////////////////////////////////////////////////////////////////////////////
1222/// fast per-event training
1223
1225{
1226 GetEvent(ievt);
1227
1228 // as soon as we know how to get event weights, get that here
1229
1230 // note: the normalization of event weights will affect the choice
1231 // of learning rate, one will have to experiment to get the right value.
1232 // in general, if the "average" event weight is 1, the learning rate
1233 // should be good if set around 0.02 (a good value if all event weights are 1)
1234 Double_t eventWeight = 1.0;
1235
1236 // get the desired output of this event
1237 Double_t desired;
1238 if (type == 0) desired = fOutput->GetMin(); // background //zjh
1239 else desired = fOutput->GetMax(); // signal //zjh
1240
1241 // force the value for each input neuron
1242 Double_t x;
1243 TNeuron* neuron;
1244
1245 for (UInt_t j = 0; j < GetNvar(); j++) {
1246 x = branchVar[j];
1247 if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1248 neuron = GetInputNeuron(j);
1249 neuron->ForceValue(x);
1250 }
1251
1252 ForceNetworkCalculations();
1253 UpdateNetwork(desired, eventWeight);
1254}
1255
1256////////////////////////////////////////////////////////////////////////////////
1257/// train network over a single event
1258/// this uses the new event model
1259
1261{
1262 // note: the normalization of event weights will affect the choice
1263 // of learning rate, one will have to experiment to get the right value.
1264 // in general, if the "average" event weight is 1, the learning rate
1265 // should be good if set around 0.02 (a good value if all event weights are 1)
1266
1267 const Event * ev = GetEvent(ievt);
1268 Double_t eventWeight = ev->GetWeight();
1269 ForceNetworkInputs( ev );
1270 ForceNetworkCalculations();
1271 if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1272 if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1273 else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1274}
1275
1276////////////////////////////////////////////////////////////////////////////////
1277/// get the desired output of this event
1278
1280{
1281 return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
1282}
1283
1284////////////////////////////////////////////////////////////////////////////////
1285/// update the network based on how closely
1286/// the output matched the desired output
1287
1289{
1290 Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1291 if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ; //zjh
1292 else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
1293 else Log() << kFATAL << "Estimator type unspecified!!" << Endl; //zjh
1294 error *= eventWeight;
1295 GetOutputNeuron()->SetError(error);
1296 CalculateNeuronDeltas();
1297 UpdateSynapses();
1298}
1299
1300////////////////////////////////////////////////////////////////////////////////
1301/// update the network based on how closely
1302/// the output matched the desired output
1303
1304void TMVA::MethodMLP::UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight)
1305{
1306 // Norm for softmax
1307 Double_t norm = 0.;
1308 for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1309 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1310 norm += TMath::Exp(act);
1311 }
1312
1313 // Get output of network, and apply softmax
1314 for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1315 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1316 Double_t output = TMath::Exp(act) / norm;
1317 Double_t error = output - desired.at(i);
1318 error *= eventWeight;
1319 GetOutputNeuron(i)->SetError(error);
1320 }
1321
1322 // Do backpropagation
1323 CalculateNeuronDeltas();
1324 UpdateSynapses();
1325}
1326
1327////////////////////////////////////////////////////////////////////////////////
1328/// have each neuron calculate its delta by back propagation
1329
1331{
1332 TNeuron* neuron;
1333 Int_t numNeurons;
1334 Int_t numLayers = fNetwork->GetEntriesFast();
1335 TObjArray* curLayer;
1336
1337 // step backwards through the network (back propagation)
1338 // deltas calculated starting at output layer
1339 for (Int_t i = numLayers-1; i >= 0; i--) {
1340 curLayer = (TObjArray*)fNetwork->At(i);
1341 numNeurons = curLayer->GetEntriesFast();
1342
1343 for (Int_t j = 0; j < numNeurons; j++) {
1344 neuron = (TNeuron*) curLayer->At(j);
1345 neuron->CalculateDelta();
1346 }
1347 }
1348}
1349
1350////////////////////////////////////////////////////////////////////////////////
1351/// create genetics class similar to GeneticCut
1352/// give it vector of parameter ranges (parameters = weights)
1353/// link fitness function of this class to ComputeEstimator
1354/// instantiate GA (see MethodCuts)
1355/// run it
1356/// then this should exist for GA, Minuit and random sampling
1357
1359{
1360 PrintMessage("Minimizing Estimator with GA");
1361
1362 // define GA parameters
1363 fGA_preCalc = 1;
1364 fGA_SC_steps = 10;
1365 fGA_SC_rate = 5;
1366 fGA_SC_factor = 0.95;
1367 fGA_nsteps = 30;
1368
1369 // ranges
1370 std::vector<Interval*> ranges;
1371
1372 Int_t numWeights = fSynapses->GetEntriesFast();
1373 for (Int_t ivar=0; ivar< numWeights; ivar++) {
1374 ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1375 }
1376
1377 FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
1378 gf->Run();
1379
1380 Double_t estimator = CalculateEstimator();
1381 Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
1382}
1383
1384////////////////////////////////////////////////////////////////////////////////
1385/// interface to the estimate
1386
1387Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1388{
1389 return ComputeEstimator( parameters );
1390}
1391
1392////////////////////////////////////////////////////////////////////////////////
1393/// this function is called by GeneticANN for GA optimization
1394
1395Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1396{
1397 TSynapse* synapse;
1398 Int_t numSynapses = fSynapses->GetEntriesFast();
1399
1400 for (Int_t i = 0; i < numSynapses; i++) {
1401 synapse = (TSynapse*)fSynapses->At(i);
1402 synapse->SetWeight(parameters.at(i));
1403 }
1404 if (fUseRegulator) UpdatePriors(); //zjh
1405
1406 Double_t estimator = CalculateEstimator();
1407
1408 return estimator;
1409}
1410
1411////////////////////////////////////////////////////////////////////////////////
1412/// update synapse error fields and adjust the weights (if in sequential mode)
1413
1415{
1416 TNeuron* neuron;
1417 Int_t numNeurons;
1418 TObjArray* curLayer;
1419 Int_t numLayers = fNetwork->GetEntriesFast();
1420
1421 for (Int_t i = 0; i < numLayers; i++) {
1422 curLayer = (TObjArray*)fNetwork->At(i);
1423 numNeurons = curLayer->GetEntriesFast();
1424
1425 for (Int_t j = 0; j < numNeurons; j++) {
1426 neuron = (TNeuron*) curLayer->At(j);
1427 if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1428 else neuron->UpdateSynapsesSequential();
1429 }
1430 }
1431}
1432
1433////////////////////////////////////////////////////////////////////////////////
1434/// just adjust the synapse weights (should be called in batch mode)
1435
1437{
1438 TNeuron* neuron;
1439 Int_t numNeurons;
1440 TObjArray* curLayer;
1441 Int_t numLayers = fNetwork->GetEntriesFast();
1442
1443 for (Int_t i = numLayers-1; i >= 0; i--) {
1444 curLayer = (TObjArray*)fNetwork->At(i);
1445 numNeurons = curLayer->GetEntriesFast();
1446
1447 for (Int_t j = 0; j < numNeurons; j++) {
1448 neuron = (TNeuron*) curLayer->At(j);
1449 neuron->AdjustSynapseWeights();
1450 }
1451 }
1452}
1453
1454////////////////////////////////////////////////////////////////////////////////
1455
1457{
1458 fPrior=0;
1459 fPriorDev.clear();
1460 Int_t nSynapses = fSynapses->GetEntriesFast();
1461 for (Int_t i=0;i<nSynapses;i++) {
1462 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1463 fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1464 fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1465 }
1466}
1467
1468////////////////////////////////////////////////////////////////////////////////
1469
1471{
1472 TMatrixD InvH(0,0);
1473 GetApproxInvHessian(InvH);
1474 Int_t numSynapses=fSynapses->GetEntriesFast();
1475 Int_t numRegulators=fRegulators.size();
1476 Float_t gamma=0,
1477 variance=1.; // Gaussian noise
1478 std::vector<Int_t> nWDP(numRegulators);
1479 std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1480 for (int i=0;i<numSynapses;i++) {
1481 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1482 Int_t idx=fRegulatorIdx[i];
1483 nWDP[idx]++;
1484 trace[idx]+=InvH[i][i];
1485 gamma+=1-fRegulators[idx]*InvH[i][i];
1486 weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1487 }
1488 if (fEstimator==kMSE) {
1489 if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1490 else variance=CalculateEstimator( Types::kTraining, 0 );
1491 }
1492
1493 //Log() << kDEBUG << Endl;
1494 for (int i=0;i<numRegulators;i++)
1495 {
1496 //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
1497 fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1498 if (fRegulators[i]<0) fRegulators[i]=0;
1499 Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
1500 }
1501 float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
1502 float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
1503
1504 Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
1505
1506}
1507
1508////////////////////////////////////////////////////////////////////////////////
1509
1510void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate) //zjh
1511{
1512 Int_t numSynapses=fSynapses->GetEntriesFast();
1513 InvHessian.ResizeTo( numSynapses, numSynapses );
1514 InvHessian=0;
1515 TMatrixD sens(numSynapses,1);
1516 TMatrixD sensT(1,numSynapses);
1517 Int_t nEvents = GetNEvents();
1518 for (Int_t i=0;i<nEvents;i++) {
1519 GetEvent(i);
1520 double outputValue=GetMvaValue(); // force calculation
1521 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1522 CalculateNeuronDeltas();
1523 for (Int_t j = 0; j < numSynapses; j++){
1524 TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1525 synapses->InitDelta();
1526 synapses->CalculateDelta();
1527 sens[j][0]=sensT[0][j]=synapses->GetDelta();
1528 }
1529 if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1530 else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1531 }
1532
1533 // TVectorD eValue(numSynapses);
1534 if (regulate) {
1535 for (Int_t i = 0; i < numSynapses; i++){
1536 InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1537 }
1538 }
1539 else {
1540 for (Int_t i = 0; i < numSynapses; i++){
1541 InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
1542 }
1543 }
1544
1545 InvHessian.Invert();
1546
1547}
1548
1549////////////////////////////////////////////////////////////////////////////////
1550
1552{
1553 Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation
1554
1555 // no hessian (old training file) or no error requested
1556 if (!fCalculateErrors || errLower==0 || errUpper==0)
1557 return MvaValue;
1558
1559 Double_t MvaUpper,MvaLower,median,variance;
1560 Int_t numSynapses=fSynapses->GetEntriesFast();
1561 if (fInvHessian.GetNcols()!=numSynapses) {
1562 Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
1563 }
1564 TMatrixD sens(numSynapses,1);
1565 TMatrixD sensT(1,numSynapses);
1566 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1567 //GetOutputNeuron()->SetError(1.);
1568 CalculateNeuronDeltas();
1569 for (Int_t i = 0; i < numSynapses; i++){
1570 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1571 synapses->InitDelta();
1572 synapses->CalculateDelta();
1573 sensT[0][i]=synapses->GetDelta();
1574 }
1575 sens.Transpose(sensT);
1576 TMatrixD sig=sensT*fInvHessian*sens;
1577 variance=sig[0][0];
1578 median=GetOutputNeuron()->GetValue();
1579
1580 if (variance<0) {
1581 Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl;
1582 variance=0;
1583 }
1584 variance=sqrt(variance);
1585
1586 //upper
1587 MvaUpper=fOutput->Eval(median+variance);
1588 if(errUpper)
1589 *errUpper=MvaUpper-MvaValue;
1590
1591 //lower
1592 MvaLower=fOutput->Eval(median-variance);
1593 if(errLower)
1594 *errLower=MvaValue-MvaLower;
1595
1596 return MvaValue;
1597}
1598
1599
1600#ifdef MethodMLP_UseMinuit__
1601
1602////////////////////////////////////////////////////////////////////////////////
1603/// minimize using Minuit
1604
1605void TMVA::MethodMLP::MinuitMinimize()
1606{
1607 fNumberOfWeights = fSynapses->GetEntriesFast();
1608
1609 TFitter* tfitter = new TFitter( fNumberOfWeights );
1610
1611 // minuit-specific settings
1612 Double_t args[10];
1613
1614 // output level
1615 args[0] = 2; // put to 0 for results only, or to -1 for no garbage
1616 tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
1617 tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
1618
1619 double w[54];
1620
1621 // init parameters
1622 for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1623 TString parName = Form("w%i", ipar);
1624 tfitter->SetParameter( ipar,
1625 parName, w[ipar], 0.1, 0, 0 );
1626 }
1627
1628 // define the CFN function
1629 tfitter->SetFCN( &IFCN );
1630
1631 // define fit strategy
1632 args[0] = 2;
1633 tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
1634
1635 // now do the fit !
1636 args[0] = 1e-04;
1637 tfitter->ExecuteCommand( "MIGRAD", args, 1 );
1638
1639 Bool_t doBetter = kFALSE;
1640 Bool_t doEvenBetter = kFALSE;
1641 if (doBetter) {
1642 args[0] = 1e-04;
1643 tfitter->ExecuteCommand( "IMPROVE", args, 1 );
1644
1645 if (doEvenBetter) {
1646 args[0] = 500;
1647 tfitter->ExecuteCommand( "MINOS", args, 1 );
1648 }
1649 }
1650}
1651
1652////////////////////////////////////////////////////////////////////////////////
1653/// Evaluate the minimisation function
1654///
1655/// Input parameters:
1656/// - npars: number of currently variable parameters
1657/// CAUTION: this is not (necessarily) the dimension of the fitPars vector !
1658/// - fitPars: array of (constant and variable) parameters
1659/// - iflag: indicates what is to be calculated (see example below)
1660/// - grad: array of gradients
1661///
1662/// Output parameters:
1663/// - f: the calculated function value.
1664/// - grad: the (optional) vector of first derivatives).
1665
1666void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1667{
1668 ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1669}
1670
1671TTHREAD_TLS(Int_t) nc = 0;
1672TTHREAD_TLS(double) minf = 1000000;
1673
1674void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1675{
1676 // first update the weights
1677 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1678 TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1679 synapse->SetWeight(fitPars[ipar]);
1680 }
1681
1682 // now compute the estimator
1683 f = CalculateEstimator();
1684
1685 nc++;
1686 if (f < minf) minf = f;
1687 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
1688 Log() << kDEBUG << Endl;
1689 Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
1690}
1691
1692////////////////////////////////////////////////////////////////////////////////
1693/// global "this" pointer to be used in minuit
1694
1695TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1696{
1697 return fgThis;
1698}
1699
1700#endif
1701
1702
1703////////////////////////////////////////////////////////////////////////////////
1704/// write specific classifier response
1705
1706void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1707{
1708 MethodANNBase::MakeClassSpecific(fout, className);
1709}
1710
1711////////////////////////////////////////////////////////////////////////////////
1712/// get help message text
1713///
1714/// typical length of text line:
1715/// "|--------------------------------------------------------------|"
1716
1718{
1719 TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1720 TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1721
1722 Log() << Endl;
1723 Log() << col << "--- Short description:" << colres << Endl;
1724 Log() << Endl;
1725 Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1726 Log() << "forward multilayer perceptron implementation. The MLP has a user-" << Endl;
1727 Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
1728 Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1729 Log() << "signal and one background). " << Endl;
1730 Log() << Endl;
1731 Log() << col << "--- Performance optimisation:" << colres << Endl;
1732 Log() << Endl;
1733 Log() << "Neural networks are stable and performing for a large variety of " << Endl;
1734 Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
1735 Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1736 Log() << "number of input variables that have only little discrimination power. " << Endl;
1737 Log() << "" << Endl;
1738 Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1739 Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1740 Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1741 Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1742 Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
1743 Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
1744 Log() << "competitive results)." << Endl;
1745 Log() << Endl;
1746 Log() << col << "Overtraining: " << colres
1747 << "only the TMlpANN performs an explicit separation of the" << Endl;
1748 Log() << "full training sample into independent training and validation samples." << Endl;
1749 Log() << "We have found that in most high-energy physics applications the " << Endl;
1750 Log() << "available degrees of freedom (training events) are sufficient to " << Endl;
1751 Log() << "constrain the weights of the relatively simple architectures required" << Endl;
1752 Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
1753 Log() << "the use of validation samples would only reduce the available training" << Endl;
1754 Log() << "information. However, if the performance on the training sample is " << Endl;
1755 Log() << "found to be significantly better than the one found with the inde-" << Endl;
1756 Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
1757 Log() << "are printed to standard output at the end of each training job." << Endl;
1758 Log() << Endl;
1759 Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
1760 Log() << Endl;
1761 Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
1762 Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1763 Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
1764 Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1765 Log() << "in favour of more neurons in the first hidden layer." << Endl;
1766 Log() << "" << Endl;
1767 Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
1768 Log() << "adjustable weights is small compared to the training sample size," << Endl;
1769 Log() << "using a large number of training samples should not lead to overtraining." << Endl;
1770}
1771
#define REGISTER_METHOD(CLASS)
for example
#define d(i)
Definition: RSha256.hxx:102
#define f(i)
Definition: RSha256.hxx:104
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
float Float_t
Definition: RtypesCore.h:53
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
double sqrt(double)
double exp(double)
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
char * Form(const char *fmt,...)
The ROOT standard fitter based on TMinuit.
Definition: TFitter.h:19
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter
Definition: TFitter.cxx:596
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
Definition: TFitter.cxx:92
virtual void SetFCN(void(*fcn)(Int_t &, Double_t *, Double_t &f, Double_t *, Int_t))
Specify the address of the fitting algorithm.
Definition: TFitter.cxx:564
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
The TH1 histogram class.
Definition: TH1.h:56
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3275
Bool_t WriteOptionsReference() const
Definition: Config.h:67
Class that contains all the data information.
Definition: DataSetInfo.h:60
std::vector< Float_t > & GetTargets()
Definition: Event.h:103
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition: Event.cxx:382
UInt_t GetClass() const
Definition: Event.h:86
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
Base class for TMVA fitters.
Definition: FitterBase.h:51
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:74
Fitter using a Genetic Algorithm.
Definition: GeneticFitter.h:43
The TMVA::Interval Class.
Definition: Interval.h:61
Base class for all TMVA methods using artificial neural networks.
Definition: MethodANNBase.h:62
virtual void ProcessOptions()
do nothing specific at this moment
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Multilayer Perceptron class built off of MethodANNBase.
Definition: MethodMLP.h:69
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
Definition: MethodMLP.cxx:842
void GetHelpMessage() const
get help message text
Definition: MethodMLP.cxx:1717
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with back propagation algorithm
Definition: MethodMLP.cxx:1039
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1005
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Definition: MethodMLP.cxx:1551
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodMLP.cxx:1706
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
Definition: MethodMLP.cxx:1436
void SteepestDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:776
void TrainOneEpoch()
train network over a single epoch/cycle of events
Definition: MethodMLP.cxx:1144
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Definition: MethodMLP.cxx:154
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Definition: MethodMLP.cxx:789
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
Definition: MethodMLP.cxx:1395
void InitializeLearningRates()
initialize learning rates of synapses, used only by back propagation
Definition: MethodMLP.cxx:280
void CalculateNeuronDeltas()
have each neuron calculate its delta by back propagation
Definition: MethodMLP.cxx:1330
Double_t DerivDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:827
Double_t GetCEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1022
virtual ~MethodMLP()
destructor nothing to be done
Definition: MethodMLP.cxx:140
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
Definition: MethodMLP.cxx:810
void Shuffle(Int_t *index, Int_t n)
Input:
Definition: MethodMLP.cxx:1192
void SimulateEvent(const Event *ev)
Definition: MethodMLP.cxx:736
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
Definition: MethodMLP.cxx:952
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Definition: MethodMLP.cxx:674
Double_t EstimatorFunction(std::vector< Double_t > &parameters)
interface to the estimate
Definition: MethodMLP.cxx:1387
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
Definition: MethodMLP.cxx:1510
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Definition: MethodMLP.cxx:489
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
Definition: MethodMLP.cxx:1414
void Init()
default initializations
Definition: MethodMLP.cxx:166
void ProcessOptions()
process user options
Definition: MethodMLP.cxx:249
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Definition: MethodMLP.cxx:1260
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
Definition: MethodMLP.cxx:1279
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Definition: MethodMLP.cxx:1358
Double_t GetError()
Definition: MethodMLP.cxx:968
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum
Definition: MethodMLP.cxx:1210
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
Definition: MethodMLP.cxx:1224
void ComputeDEDw()
Definition: MethodMLP.cxx:699
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
Definition: MethodMLP.cxx:1288
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
Definition: MethodMLP.cxx:92
void UpdateRegulators()
Definition: MethodMLP.cxx:1470
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodMLP.cxx:197
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Definition: MethodMLP.cxx:294
Neuron class used by TMVA artificial neural network methods.
Definition: TNeuron.h:49
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
Definition: TNeuron.cxx:263
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
Definition: TNeuron.cxx:84
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
Definition: TNeuron.cxx:242
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
Definition: TNeuron.cxx:224
void CalculateDelta()
calculate error field
Definition: TNeuron.cxx:115
Synapse class used by TMVA artificial neural network methods.
Definition: TSynapse.h:44
void SetWeight(Double_t weight)
set synapse weight
Definition: TSynapse.cxx:69
Double_t GetWeight()
Definition: TSynapse.h:55
void SetDEDw(Double_t DEDw)
Definition: TSynapse.h:87
void SetLearningRate(Double_t rate)
Definition: TSynapse.h:58
Double_t GetDEDw()
Definition: TSynapse.h:88
void InitDelta()
Definition: TSynapse.h:85
Double_t GetDelta()
Definition: TSynapse.h:89
void DecayLearningRate(Double_t rate)
Definition: TSynapse.h:64
void CalculateDelta()
calculate/adjust the error field for this synapse
Definition: TSynapse.cxx:109
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:196
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition: Tools.cxx:122
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
Singleton class for Global types used by TMVA.
Definition: Types.h:73
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kRegression
Definition: Types.h:129
@ kTraining
Definition: Types.h:144
@ kTesting
Definition: Types.h:145
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
Definition: TMatrixT.cxx:1210
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
Definition: TMatrixT.cxx:1396
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Definition: TMatrixT.cxx:1469
An array of TObjects.
Definition: TObjArray.h:37
Int_t GetEntriesFast() const
Definition: TObjArray.h:64
TObject * At(Int_t idx) const
Definition: TObjArray.h:166
Basic string class.
Definition: TString.h:131
This is a simple weighted bidirectional connection between two neurons.
Definition: TSynapse.h:20
void SetWeight(Double_t w)
Sets the weight of the synapse.
Definition: TSynapse.cxx:103
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:150
double gamma(double x)
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Config & gConfig()
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Exp(Double_t x)
Definition: TMath.h:717
Double_t Log(Double_t x)
Definition: TMath.h:750
Double_t Sqrt(Double_t x)
Definition: TMath.h:681
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:348
auto * a
Definition: textangle.C:12
static void output(int code)
Definition: gifencode.c:226