Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodMLP.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard v. Toerne, Jan Therhaag, Jiahang Zhong
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodMLP *
8 * *
9 * *
10 * Description: *
11 * ANN Multilayer Perceptron class for the discrimination of signal *
12 * from background. BFGS implementation based on TMultiLayerPerceptron *
13 * class from ROOT (http://root.cern.ch). *
14 * *
15 * Authors (alphabetical): *
16 * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18 * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19 * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20 * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21 * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22 * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25 * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26 * *
27 * Copyright (c) 2005-2011: *
28 * CERN, Switzerland *
29 * U. of Victoria, Canada *
30 * MPI-K Heidelberg, Germany *
31 * U. of Bonn, Germany *
32 * *
33 * Redistribution and use in source and binary forms, with or without *
34 * modification, are permitted according to the terms listed in LICENSE *
35 * (see tmva/doc/LICENSE) *
36 **********************************************************************************/
37
38/*! \class TMVA::MethodMLP
39\ingroup TMVA
40
41Multilayer Perceptron class built off of MethodANNBase
42
43*/
44
45#include "TMVA/MethodMLP.h"
46
47#include "TMVA/Config.h"
48#include "TMVA/Configurable.h"
51#include "TMVA/DataSet.h"
52#include "TMVA/DataSetInfo.h"
53#include "TMVA/FitterBase.h"
54#include "TMVA/GeneticFitter.h"
55#include "TMVA/IFitterTarget.h"
56#include "TMVA/IMethod.h"
57#include "TMVA/Interval.h"
58#include "TMVA/MethodANNBase.h"
59#include "TMVA/MsgLogger.h"
60#include "TMVA/TNeuron.h"
61#include "TMVA/TSynapse.h"
62#include "TMVA/Timer.h"
63#include "TMVA/Tools.h"
64#include "TMVA/Types.h"
65
66#include "TH1.h"
67#include "TString.h"
68#include "TFitter.h"
69#include "TMatrixD.h"
70#include "TMath.h"
71
72#include <iostream>
73#include <cmath>
74#include <vector>
75
76#ifdef MethodMLP_UseMinuit__
77TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
79#endif
80
82
83
84 using std::vector;
85
86////////////////////////////////////////////////////////////////////////////////
87/// standard constructor
88
90 const TString& methodTitle,
92 const TString& theOption)
93 : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption),
94 fUseRegulator(false), fCalculateErrors(false),
95 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
96 fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
97 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
98 fSamplingTraining(false), fSamplingTesting(false),
99 fLastAlpha(0.0), fTau(0.),
100 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
101 fBPMode(kSequential), fBpModeS("None"),
102 fBatchSize(0), fTestRate(0), fEpochMon(false),
103 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
104 fGA_SC_rate(0), fGA_SC_factor(0.0),
105 fDeviationsFromTargets(0),
106 fWeightRange (1.0)
107{
108
109}
110
111////////////////////////////////////////////////////////////////////////////////
112/// constructor from a weight file
113
115 const TString& theWeightFile)
117 fUseRegulator(false), fCalculateErrors(false),
118 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
119 fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
120 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
121 fSamplingTraining(false), fSamplingTesting(false),
122 fLastAlpha(0.0), fTau(0.),
123 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
124 fBPMode(kSequential), fBpModeS("None"),
125 fBatchSize(0), fTestRate(0), fEpochMon(false),
126 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
127 fGA_SC_rate(0), fGA_SC_factor(0.0),
128 fDeviationsFromTargets(0),
129 fWeightRange (1.0)
130{
131}
132
133////////////////////////////////////////////////////////////////////////////////
134/// destructor
135/// nothing to be done
136
140
142{
143 Train(NumCycles());
144}
145
146
147
148////////////////////////////////////////////////////////////////////////////////
149/// MLP can handle classification with 2 classes and regression with one regression-target
150
152{
153 if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
154 if (type == Types::kMulticlass ) return kTRUE;
155 if (type == Types::kRegression ) return kTRUE;
156
157 return kFALSE;
158}
159
160////////////////////////////////////////////////////////////////////////////////
161/// default initializations
162
164{
165 // the minimum requirement to declare an event signal-like
166 SetSignalReferenceCut( 0.5 );
167#ifdef MethodMLP_UseMinuit__
168 fgThis = this;
169#endif
170}
171
172////////////////////////////////////////////////////////////////////////////////
173/// define the options (their key words) that can be set in the option string
174///
175/// know options:
176///
177/// - TrainingMethod `<string>` Training method
178/// available values are:
179/// - BP Back-Propagation `<default>`
180/// - GA Genetic Algorithm (takes a LONG time)
181///
182/// - LearningRate `<float>` NN learning rate parameter
183/// - DecayRate `<float>` Decay rate for learning parameter
184/// - TestRate `<int>` Test for overtraining performed at each #%th epochs
185///
186/// - BPMode `<string>` Back-propagation learning mode
187/// available values are:
188/// - sequential `<default>`
189/// - batch
190///
191/// - BatchSize `<int>` Batch size: number of events/batch, only set if in Batch Mode,
192/// - -1 for BatchSize=number_of_events
193
195{
196 DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
197 "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
198 AddPreDefVal(TString("BP"));
199 AddPreDefVal(TString("GA"));
200 AddPreDefVal(TString("BFGS"));
201
202 DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
203 DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
204 DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
205 DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
206
207 DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
208 DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
209 DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
210
211 DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
212 DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
213
214 DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
215 DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
216
217 DeclareOptionRef(fBpModeS="sequential", "BPMode",
218 "Back-propagation learning mode: sequential or batch");
219 AddPreDefVal(TString("sequential"));
220 AddPreDefVal(TString("batch"));
221
222 DeclareOptionRef(fBatchSize=-1, "BatchSize",
223 "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
224
225 DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
226 "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
227
228 DeclareOptionRef(fSteps=-1, "ConvergenceTests",
229 "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
230
231 DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
232 "Use regulator to avoid over-training"); //zjh
233 DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit",
234 "Maximum times of regulator update"); //zjh
235 DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
236 "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh
237
238 DeclareOptionRef(fWeightRange=1.0, "WeightRange",
239 "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
240
241}
242
243////////////////////////////////////////////////////////////////////////////////
244/// process user options
245
247{
249
250
251 if (IgnoreEventsWithNegWeightsInTraining()) {
252 Log() << kINFO
253 << "Will ignore negative events in training!"
254 << Endl;
255 }
256
257
258 if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
259 else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
260 else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
261
262 if (fBpModeS == "sequential") fBPMode = kSequential;
263 else if (fBpModeS == "batch") fBPMode = kBatch;
264
265 // InitializeLearningRates();
266
267 if (fBPMode == kBatch) {
268 Data()->SetCurrentType(Types::kTraining);
269 Int_t numEvents = Data()->GetNEvents();
271 }
272}
273
274////////////////////////////////////////////////////////////////////////////////
275/// initialize learning rates of synapses, used only by back propagation
276
278{
279 Log() << kDEBUG << "Initialize learning rates" << Endl;
281 Int_t numSynapses = fSynapses->GetEntriesFast();
282 for (Int_t i = 0; i < numSynapses; i++) {
283 synapse = (TSynapse*)fSynapses->At(i);
284 synapse->SetLearningRate(fLearnRate);
285 }
286}
287
288////////////////////////////////////////////////////////////////////////////////
289/// calculate the estimator that training is attempting to minimize
290
292{
293 // sanity check
295 Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
296 }
297
298 Types::ETreeType saveType = Data()->GetCurrentType();
299 Data()->SetCurrentType(treeType);
300
301 // if epochs are counted create monitoring histograms (only available for classification)
302 TString type = (treeType == Types::kTraining ? "train" : "test");
303 TString name = TString::Format("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
304 TString nameB = name + "_B";
305 TString nameS = name + "_S";
306 Int_t nbin = 100;
307 Float_t limit = 2;
308 TH1* histS = 0;
309 TH1* histB = 0;
310 if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
311 histS = new TH1F( nameS, nameS, nbin, -limit, limit );
312 histB = new TH1F( nameB, nameB, nbin, -limit, limit );
313 }
314
316
317 // loop over all training events
318 Int_t nEvents = GetNEvents();
319 UInt_t nClasses = DataInfo().GetNClasses();
320 UInt_t nTgts = DataInfo().GetNTargets();
321
322
323 Float_t sumOfWeights = 0.f;
324 if( fWeightRange < 1.f ){
325 fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents);
326 }
327
328 for (Int_t i = 0; i < nEvents; i++) {
329
330 const Event* ev = GetEvent(i);
331
332 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
333 && (saveType == Types::kTraining)){
334 continue;
335 }
336
337 Double_t w = ev->GetWeight();
338
339 ForceNetworkInputs( ev );
340 ForceNetworkCalculations();
341
342 Double_t d = 0, v = 0;
343 if (DoRegression()) {
344 for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
345 v = GetOutputNeuron( itgt )->GetActivationValue();
346 Double_t targetValue = ev->GetTarget( itgt );
348 d += (dt*dt);
349 }
350 estimator += d*w;
351 } else if (DoMulticlass() ) {
352 UInt_t cls = ev->GetClass();
353 if (fEstimator==kCE){
354 Double_t norm(0);
355 for (UInt_t icls = 0; icls < nClasses; icls++) {
356 Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
357 norm += exp( activationValue );
358 if(icls==cls)
359 d = exp( activationValue );
360 }
361 d = -TMath::Log(d/norm);
362 }
363 else{
364 for (UInt_t icls = 0; icls < nClasses; icls++) {
365 Double_t desired = (icls==cls) ? 1.0 : 0.0;
366 v = GetOutputNeuron( icls )->GetActivationValue();
367 d = (desired-v)*(desired-v);
368 }
369 }
370 estimator += d*w; //zjh
371 } else {
372 Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
373 v = GetOutputNeuron()->GetActivationValue();
374 if (fEstimator==kMSE) d = (desired-v)*(desired-v); //zjh
375 else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v)); //zjh
376 estimator += d*w; //zjh
377 }
378
379 if( fDeviationsFromTargets )
380 fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
381
382 sumOfWeights += w;
383
384
385 // fill monitoring histograms
386 if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
387 else if (histB != 0) histB->Fill( float(v), float(w) );
388 }
389
390
391 if( fDeviationsFromTargets ) {
392 std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
393
395 estimator = 0.f;
396
397 Float_t weightRangeCut = fWeightRange*sumOfWeights;
398 Float_t weightSum = 0.f;
399 for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
400 float deviation = (*itDev).first;
401 float devWeight = (*itDev).second;
402 weightSum += devWeight; // add the weight of this event
403 if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange
405 }
406 }
407
409 delete fDeviationsFromTargets;
410 }
411
412 if (histS != 0) fEpochMonHistS.push_back( histS );
413 if (histB != 0) fEpochMonHistB.push_back( histB );
414
415 //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
416 //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
417 //else estimator = estimator*0.5/Float_t(nEvents);
419
420
421 //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh
422
423 Data()->SetCurrentType( saveType );
424
425 // provide epoch-wise monitoring
426 if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
427 CreateWeightMonitoringHists( TString::Format("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
428 }
429
430 return estimator;
431}
432
433////////////////////////////////////////////////////////////////////////////////
434
436{
437 if (fNetwork == 0) {
438 //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
439 Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
440 SetAnalysisType(GetAnalysisType());
441 }
442 Log() << kDEBUG << "reinitialize learning rates" << Endl;
443 InitializeLearningRates();
444 Log() << kHEADER;
445 PrintMessage("Training Network");
446 Log() << Endl;
447 Int_t nEvents=GetNEvents();
448 Int_t nSynapses=fSynapses->GetEntriesFast();
449 if (nSynapses>nEvents)
450 Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
451
452 fIPyMaxIter = nEpochs;
453 if (fInteractive && fInteractive->NotInitialized()){
454 std::vector<TString> titles = {"Error on training set", "Error on test set"};
455 fInteractive->Init(titles);
456 }
457
458#ifdef MethodMLP_UseMinuit__
460#else
461 if (fTrainingMethod == kGA) GeneticMinimize();
462 else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
463 else BackPropagationMinimize(nEpochs);
464#endif
465
466 float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
467 float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
468 if (fUseRegulator){
469 Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
470 UpdateRegulators();
471 Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
472 }
473
474 if( fCalculateErrors || fUseRegulator )
475 {
476 Int_t numSynapses=fSynapses->GetEntriesFast();
477 fInvHessian.ResizeTo(numSynapses,numSynapses);
478 GetApproxInvHessian( fInvHessian ,false);
479 }
480 ExitFromTraining();
481}
482
483////////////////////////////////////////////////////////////////////////////////
484/// train network with BFGS algorithm
485
487{
488 Timer timer( (fSteps>0?100:nEpochs), GetName() );
489
490 // create histograms for overtraining monitoring
491 Int_t nbinTest = Int_t(nEpochs/fTestRate);
492 if(!IsSilentFile())
493 {
494 fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
495 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
496 fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
497 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
498 }
499
500 Int_t nSynapses = fSynapses->GetEntriesFast();
502
503 for (Int_t i=0;i<nSynapses;i++) {
504 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
505 synapse->SetDEDw(0.0);
506 }
507
508 std::vector<Double_t> buffer( nWeights );
509 for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
510
511 TMatrixD Dir ( nWeights, 1 );
512 TMatrixD Hessian ( nWeights, nWeights );
513 TMatrixD Gamma ( nWeights, 1 );
514 TMatrixD Delta ( nWeights, 1 );
515 Int_t RegUpdateCD=0; //zjh
516 Int_t RegUpdateTimes=0; //zjh
518
519 Double_t trainE = -1;
520 Double_t testE = -1;
521
522 fLastAlpha = 0.;
523
524 if(fSamplingTraining || fSamplingTesting)
525 Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
526
527 if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
528 timer.DrawProgressBar( 0 );
529
530 // start training cycles (epochs)
531 for (Int_t i = 0; i < nEpochs; i++) {
532
533 if (fExitFromTraining) break;
534 fIPyCurrentIter = i;
535 if (Float_t(i)/nEpochs < fSamplingEpoch) {
536 if ((i+1)%fTestRate == 0 || (i == 0)) {
537 if (fSamplingTraining) {
538 Data()->SetCurrentType( Types::kTraining );
539 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
540 Data()->CreateSampling();
541 }
542 if (fSamplingTesting) {
543 Data()->SetCurrentType( Types::kTesting );
544 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
545 Data()->CreateSampling();
546 }
547 }
548 }
549 else {
550 Data()->SetCurrentType( Types::kTraining );
551 Data()->InitSampling(1.0,1.0);
552 Data()->SetCurrentType( Types::kTesting );
553 Data()->InitSampling(1.0,1.0);
554 }
555 Data()->SetCurrentType( Types::kTraining );
556
557 //zjh
558 if (fUseRegulator) {
559 UpdatePriors();
560 RegUpdateCD++;
561 }
562 //zjh
563
564 SetGammaDelta( Gamma, Delta, buffer );
565
566 if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
567 SteepestDir( Dir );
568 Hessian.UnitMatrix();
569 RegUpdateCD=0; //zjh
570 }
571 else {
572 if (GetHessian( Hessian, Gamma, Delta )) {
573 SteepestDir( Dir );
574 Hessian.UnitMatrix();
575 RegUpdateCD=0; //zjh
576 }
577 else SetDir( Hessian, Dir );
578 }
579
580 Double_t dError=0; //zjh
581 if (DerivDir( Dir ) > 0) {
582 SteepestDir( Dir );
583 Hessian.UnitMatrix();
584 RegUpdateCD=0; //zjh
585 }
586 if (LineSearch( Dir, buffer, &dError )) { //zjh
587 Hessian.UnitMatrix();
588 SteepestDir( Dir );
589 RegUpdateCD=0; //zjh
590 if (LineSearch(Dir, buffer, &dError)) { //zjh
591 i = nEpochs;
592 Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
593 }
594 }
595
596 //zjh+
597 if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
599
600 if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
601 Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
602 UpdateRegulators();
603 Hessian.UnitMatrix();
604 RegUpdateCD=0;
606 AccuError=0;
607 }
608 //zjh-
609
610 // monitor convergence of training and control sample
611 if ((i+1)%fTestRate == 0) {
612 //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample //zjh
613 //testE = CalculateEstimator( Types::kTesting, i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
614 trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample //zjh
615 testE = CalculateEstimator( Types::kTesting, i ) ; // estimator for test sample //zjh
616 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
617 if(!IsSilentFile()) //saved to see in TMVAGui, no needed without file
618 {
619 fEstimatorHistTrain->Fill( i+1, trainE );
620 fEstimatorHistTest ->Fill( i+1, testE );
621 }
623 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
624 success = kTRUE;
625 }
626 Data()->EventResult( success );
627
628 SetCurrentValue( testE );
629 if (HasConverged()) {
630 if (Float_t(i)/nEpochs < fSamplingEpoch) {
631 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
632 i = newEpoch;
633 ResetConvergenceCounter();
634 }
635 else break;
636 }
637 }
638
639 // draw progress
640 TString convText = TString::Format( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh
641 if (fSteps > 0) {
642 Float_t progress = 0;
643 if (Float_t(i)/nEpochs < fSamplingEpoch)
644 // progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
645 progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
646 else
647 {
648 // progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
649 progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
650 }
651 Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
652 if (progress2>progress) progress=progress2; //zjh
653 timer.DrawProgressBar( Int_t(progress), convText );
654 }
655 else {
656 Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
657 if (progress<i) progress=i; //zjh
658 timer.DrawProgressBar( progress, convText ); //zjh
659 }
660
661 // some verbose output
662 if (fgPRINT_SEQ) {
663 PrintNetwork();
664 WaitForKeyboard();
665 }
666 }
667}
668
669////////////////////////////////////////////////////////////////////////////////
670
671void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
672{
673 Int_t nWeights = fSynapses->GetEntriesFast();
674
675 Int_t IDX = 0;
676 Int_t nSynapses = fSynapses->GetEntriesFast();
677 for (Int_t i=0;i<nSynapses;i++) {
678 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
679 Gamma[IDX++][0] = -synapse->GetDEDw();
680 }
681
682 for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
683
684 ComputeDEDw();
685
686 IDX = 0;
687 for (Int_t i=0;i<nSynapses;i++)
688 {
689 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
690 Gamma[IDX++][0] += synapse->GetDEDw();
691 }
692}
693
694////////////////////////////////////////////////////////////////////////////////
695
697{
698 Int_t nSynapses = fSynapses->GetEntriesFast();
699 for (Int_t i=0;i<nSynapses;i++) {
700 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
701 synapse->SetDEDw( 0.0 );
702 }
703
704 Int_t nEvents = GetNEvents();
705 Int_t nPosEvents = nEvents;
706 for (Int_t i=0;i<nEvents;i++) {
707
708 const Event* ev = GetEvent(i);
709 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
710 && (Data()->GetCurrentType() == Types::kTraining)){
711 --nPosEvents;
712 continue;
713 }
714
715 SimulateEvent( ev );
716
717 for (Int_t j=0;j<nSynapses;j++) {
718 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
719 synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
720 }
721 }
722
723 for (Int_t i=0;i<nSynapses;i++) {
724 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
725 Double_t DEDw=synapse->GetDEDw(); //zjh
726 if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
727 synapse->SetDEDw( DEDw / nPosEvents ); //zjh
728 }
729}
730
731////////////////////////////////////////////////////////////////////////////////
732
734{
735 Double_t eventWeight = ev->GetWeight();
736
737 ForceNetworkInputs( ev );
738 ForceNetworkCalculations();
739
740 if (DoRegression()) {
741 UInt_t ntgt = DataInfo().GetNTargets();
742 for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
743 Double_t desired = ev->GetTarget(itgt);
744 Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
745 GetOutputNeuron( itgt )->SetError(error);
746 }
747 } else if (DoMulticlass()) {
748 UInt_t nClasses = DataInfo().GetNClasses();
749 UInt_t cls = ev->GetClass();
750 for (UInt_t icls = 0; icls < nClasses; icls++) {
751 Double_t desired = ( cls==icls ? 1.0 : 0.0 );
752 Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
753 GetOutputNeuron( icls )->SetError(error);
754 }
755 } else {
756 Double_t desired = GetDesiredOutput( ev );
757 Double_t error=-1; //zjh
758 if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight; //zjh
759 else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
760 GetOutputNeuron()->SetError(error);
761 }
762
763 CalculateNeuronDeltas();
764 for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
765 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
766 synapse->InitDelta();
767 synapse->CalculateDelta();
768 }
769}
770
771////////////////////////////////////////////////////////////////////////////////
772
774{
775 Int_t IDX = 0;
776 Int_t nSynapses = fSynapses->GetEntriesFast();
777
778 for (Int_t i=0;i<nSynapses;i++) {
779 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
780 Dir[IDX++][0] = -synapse->GetDEDw();
781 }
782}
783
784////////////////////////////////////////////////////////////////////////////////
785
787{
789 if ((Double_t) gd[0][0] == 0.) return kTRUE;
790 TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
791 TMatrixD tmp(Gamma, TMatrixD::kTransposeMult, Hessian);
793 Double_t a = 1 / (Double_t) gd[0][0];
794 Double_t f = 1 + ((Double_t)gHg[0][0]*a);
796 res *= f;
799 res *= a;
800 Hessian += res;
801
802 return kFALSE;
803}
804
805////////////////////////////////////////////////////////////////////////////////
806
808{
809 Int_t IDX = 0;
810 Int_t nSynapses = fSynapses->GetEntriesFast();
812
813 for (Int_t i=0;i<nSynapses;i++) {
814 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
815 DEDw[IDX++][0] = synapse->GetDEDw();
816 }
817
818 dir = Hessian * DEDw;
819 for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
820}
821
822////////////////////////////////////////////////////////////////////////////////
823
825{
826 Int_t IDX = 0;
827 Int_t nSynapses = fSynapses->GetEntriesFast();
828 Double_t Result = 0.0;
829
830 for (Int_t i=0;i<nSynapses;i++) {
831 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
832 Result += Dir[IDX++][0] * synapse->GetDEDw();
833 }
834 return Result;
835}
836
837////////////////////////////////////////////////////////////////////////////////
838
840{
841 Int_t IDX = 0;
842 Int_t nSynapses = fSynapses->GetEntriesFast();
844
845 std::vector<Double_t> Origin(nWeights);
846 for (Int_t i=0;i<nSynapses;i++) {
847 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
848 Origin[i] = synapse->GetWeight();
849 }
850
851 Double_t err1 = GetError();
853 Double_t alpha1 = 0.;
854 Double_t alpha2 = fLastAlpha;
855
856
857 if (alpha2 < 0.01) alpha2 = 0.01;
858 else if (alpha2 > 2.0) alpha2 = 2.0;
861
862 SetDirWeights( Origin, Dir, alpha2 );
863 Double_t err2 = GetError();
864 //Double_t err2 = err1;
867
868
869 if (err1 > err2) {
870 for (Int_t i=0;i<100;i++) {
871 alpha3 *= fTau;
872 SetDirWeights(Origin, Dir, alpha3);
873 err3 = GetError();
874 if (err3 > err2) {
875 bingo = kTRUE;
876 break;
877 }
878 alpha1 = alpha2;
879 err1 = err2;
880 alpha2 = alpha3;
881 err2 = err3;
882 }
883 if (!bingo) {
884 SetDirWeights(Origin, Dir, 0.);
885 return kTRUE;
886 }
887 }
888 else {
889 for (Int_t i=0;i<100;i++) {
890 alpha2 /= fTau;
891 if (i==50) {
892 Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
894 }
895 SetDirWeights(Origin, Dir, alpha2);
896 err2 = GetError();
897 if (err1 > err2) {
898 bingo = kTRUE;
899 break;
900 }
901 alpha3 = alpha2;
902 err3 = err2;
903 }
904 if (!bingo) {
905 SetDirWeights(Origin, Dir, 0.);
906 Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
907 fLastAlpha = 0.05;
908 return kTRUE;
909 }
910 }
911
912 if (alpha1>0 && alpha2>0 && alpha3 > 0) {
913 fLastAlpha = 0.5 * (alpha1 + alpha3 -
914 (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
915 - ( err2 - err1 ) / (alpha2 - alpha1 )));
916 }
917 else {
918 fLastAlpha = alpha2;
919 }
920
921 fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
922
923 SetDirWeights(Origin, Dir, fLastAlpha);
924
925 // leaving these lines uncommented is a heavy price to pay for only a warning message
926 // (which shouldn't appear anyway)
927 // --> about 15% of time is spent in the final GetError().
928 //
929 Double_t finalError = GetError();
930 if (finalError > err1) {
931 Log() << kWARNING << "Line search increased error! Something is wrong."
932 << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
933 << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
934 }
935
936 for (Int_t i=0;i<nSynapses;i++) {
937 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
938 buffer[IDX] = synapse->GetWeight() - Origin[IDX];
939 IDX++;
940 }
941
942 if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
943
944 return kFALSE;
945}
946
947////////////////////////////////////////////////////////////////////////////////
948
949void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
950{
951 Int_t IDX = 0;
952 Int_t nSynapses = fSynapses->GetEntriesFast();
953
954 for (Int_t i=0;i<nSynapses;i++) {
955 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
956 synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
957 IDX++;
958 }
959 if (fUseRegulator) UpdatePriors();//zjh
960}
961
962
963////////////////////////////////////////////////////////////////////////////////
964
966{
967 Int_t nEvents = GetNEvents();
968 UInt_t ntgts = GetNTargets();
969 Double_t Result = 0.;
970
971 for (Int_t i=0;i<nEvents;i++) {
972 const Event* ev = GetEvent(i);
973
974 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
975 && (Data()->GetCurrentType() == Types::kTraining)){
976 continue;
977 }
978 SimulateEvent( ev );
979
980 Double_t error = 0.;
981 if (DoRegression()) {
982 for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
983 error += GetMSEErr( ev, itgt );//zjh
984 }
985 } else if ( DoMulticlass() ){
986 for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
987 error += GetMSEErr( ev, icls );
988 }
989 } else {
990 if (fEstimator==kMSE) error = GetMSEErr( ev ); //zjh
991 else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
992 }
993 Result += error * ev->GetWeight();
994 }
995 if (fUseRegulator) Result+=fPrior; //zjh
996 if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
997 return Result;
998}
999
1000////////////////////////////////////////////////////////////////////////////////
1001
1003{
1004 Double_t error = 0;
1005 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1006 Double_t target = 0;
1007 if (DoRegression()) target = ev->GetTarget( index );
1008 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1009 else target = GetDesiredOutput( ev );
1010
1011 error = 0.5*(output-target)*(output-target); //zjh
1012
1013 return error;
1014
1015}
1016
1017////////////////////////////////////////////////////////////////////////////////
1018
1020{
1021 Double_t error = 0;
1022 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1023 Double_t target = 0;
1024 if (DoRegression()) target = ev->GetTarget( index );
1025 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1026 else target = GetDesiredOutput( ev );
1027
1028 error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
1029
1030 return error;
1031}
1032
1033////////////////////////////////////////////////////////////////////////////////
1034/// minimize estimator / train network with back propagation algorithm
1035
1037{
1038 // Timer timer( nEpochs, GetName() );
1039 Timer timer( (fSteps>0?100:nEpochs), GetName() );
1040 Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1041
1042 // create histograms for overtraining monitoring
1043 Int_t nbinTest = Int_t(nEpochs/fTestRate);
1044 if(!IsSilentFile())
1045 {
1046 fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
1047 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1048 fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
1049 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1050 }
1051 if(fSamplingTraining || fSamplingTesting)
1052 Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
1053
1054 if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
1055 timer.DrawProgressBar(0);
1056
1057 // estimators
1058 Double_t trainE = -1;
1059 Double_t testE = -1;
1060
1061 // start training cycles (epochs)
1062 for (Int_t i = 0; i < nEpochs; i++) {
1063
1064 if (fExitFromTraining) break;
1065 fIPyCurrentIter = i;
1066 if (Float_t(i)/nEpochs < fSamplingEpoch) {
1067 if ((i+1)%fTestRate == 0 || (i == 0)) {
1068 if (fSamplingTraining) {
1069 Data()->SetCurrentType( Types::kTraining );
1070 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1071 Data()->CreateSampling();
1072 }
1073 if (fSamplingTesting) {
1074 Data()->SetCurrentType( Types::kTesting );
1075 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1076 Data()->CreateSampling();
1077 }
1078 }
1079 }
1080 else {
1081 Data()->SetCurrentType( Types::kTraining );
1082 Data()->InitSampling(1.0,1.0);
1083 Data()->SetCurrentType( Types::kTesting );
1084 Data()->InitSampling(1.0,1.0);
1085 }
1086 Data()->SetCurrentType( Types::kTraining );
1087
1088 TrainOneEpoch();
1089 DecaySynapseWeights(i >= lateEpoch);
1090
1091 // monitor convergence of training and control sample
1092 if ((i+1)%fTestRate == 0) {
1093 trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
1094 testE = CalculateEstimator( Types::kTesting, i ); // estimator for test sample
1095 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1096 if(!IsSilentFile())
1097 {
1098 fEstimatorHistTrain->Fill( i+1, trainE );
1099 fEstimatorHistTest ->Fill( i+1, testE );
1100 }
1102 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1103 success = kTRUE;
1104 }
1105 Data()->EventResult( success );
1106
1107 SetCurrentValue( testE );
1108 if (HasConverged()) {
1109 if (Float_t(i)/nEpochs < fSamplingEpoch) {
1110 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1111 i = newEpoch;
1112 ResetConvergenceCounter();
1113 }
1114 else {
1115 if (lateEpoch > i) lateEpoch = i;
1116 else break;
1117 }
1118 }
1119 }
1120
1121 // draw progress bar (add convergence value)
1122 TString convText = TString::Format( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
1123 if (fSteps > 0) {
1124 Float_t progress = 0;
1125 if (Float_t(i)/nEpochs < fSamplingEpoch)
1126 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1127 else
1128 progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1129
1130 timer.DrawProgressBar( Int_t(progress), convText );
1131 }
1132 else {
1133 timer.DrawProgressBar( i, convText );
1134 }
1135 }
1136}
1137
1138////////////////////////////////////////////////////////////////////////////////
1139/// train network over a single epoch/cycle of events
1140
1142{
1143 Int_t nEvents = Data()->GetNEvents();
1144
1145 // randomize the order events will be presented, important for sequential mode
1146 Int_t* index = new Int_t[nEvents];
1147 for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1148 Shuffle(index, nEvents);
1149
1150 // loop over all training events
1151 for (Int_t i = 0; i < nEvents; i++) {
1152
1153 const Event * ev = GetEvent(index[i]);
1154 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1155 && (Data()->GetCurrentType() == Types::kTraining)){
1156 continue;
1157 }
1158
1159 TrainOneEvent(index[i]);
1160
1161 // do adjustments if in batch mode
1162 if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1163 AdjustSynapseWeights();
1164 if (fgPRINT_BATCH) {
1165 PrintNetwork();
1166 WaitForKeyboard();
1167 }
1168 }
1169
1170 // debug in sequential mode
1171 if (fgPRINT_SEQ) {
1172 PrintNetwork();
1173 WaitForKeyboard();
1174 }
1175 }
1176
1177 delete[] index;
1178}
1179
1180////////////////////////////////////////////////////////////////////////////////
1181/// Input:
1182/// - index: the array to shuffle
1183/// - n: the size of the array
1184/// Output:
1185/// - index: the shuffled indexes
1186///
1187/// This method is used for sequential training
1188
1190{
1191 Int_t j, k;
1192 Int_t a = n - 1;
1193 for (Int_t i = 0; i < n; i++) {
1194 j = (Int_t) (frgen->Rndm() * a);
1195 if (j<n){ // address the 'worries' of coverity
1196 k = index[j];
1197 index[j] = index[i];
1198 index[i] = k;
1199 }
1200 }
1201}
1202
1203////////////////////////////////////////////////////////////////////////////////
1204/// decay synapse weights
1205/// in last 10 epochs, lower learning rate even more to find a good minimum
1206
1208{
1210 Int_t numSynapses = fSynapses->GetEntriesFast();
1211 for (Int_t i = 0; i < numSynapses; i++) {
1212 synapse = (TSynapse*)fSynapses->At(i);
1213 if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square.
1214 else synapse->DecayLearningRate(fDecayRate);
1215 }
1216}
1217
1218////////////////////////////////////////////////////////////////////////////////
1219/// fast per-event training
1220
1222{
1223 GetEvent(ievt);
1224
1225 // as soon as we know how to get event weights, get that here
1226
1227 // note: the normalization of event weights will affect the choice
1228 // of learning rate, one will have to experiment to get the right value.
1229 // in general, if the "average" event weight is 1, the learning rate
1230 // should be good if set around 0.02 (a good value if all event weights are 1)
1231 Double_t eventWeight = 1.0;
1232
1233 // get the desired output of this event
1235 if (type == 0) desired = fOutput->GetMin(); // background //zjh
1236 else desired = fOutput->GetMax(); // signal //zjh
1237
1238 // force the value for each input neuron
1239 Double_t x;
1240 TNeuron* neuron;
1241
1242 for (UInt_t j = 0; j < GetNvar(); j++) {
1243 x = branchVar[j];
1244 if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1245 neuron = GetInputNeuron(j);
1246 neuron->ForceValue(x);
1247 }
1248
1249 ForceNetworkCalculations();
1250 UpdateNetwork(desired, eventWeight);
1251}
1252
1253////////////////////////////////////////////////////////////////////////////////
1254/// train network over a single event
1255/// this uses the new event model
1256
1258{
1259 // note: the normalization of event weights will affect the choice
1260 // of learning rate, one will have to experiment to get the right value.
1261 // in general, if the "average" event weight is 1, the learning rate
1262 // should be good if set around 0.02 (a good value if all event weights are 1)
1263
1264 const Event * ev = GetEvent(ievt);
1265 Double_t eventWeight = ev->GetWeight();
1266 ForceNetworkInputs( ev );
1267 ForceNetworkCalculations();
1268 if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1269 if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1270 else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1271}
1272
1273////////////////////////////////////////////////////////////////////////////////
1274/// get the desired output of this event
1275
1277{
1278 return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
1279}
1280
1281////////////////////////////////////////////////////////////////////////////////
1282/// update the network based on how closely
1283/// the output matched the desired output
1284
1286{
1287 Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1288 if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ; //zjh
1289 else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
1290 else Log() << kFATAL << "Estimator type unspecified!!" << Endl; //zjh
1291 error *= eventWeight;
1292 GetOutputNeuron()->SetError(error);
1293 CalculateNeuronDeltas();
1294 UpdateSynapses();
1295}
1296
1297////////////////////////////////////////////////////////////////////////////////
1298/// update the network based on how closely
1299/// the output matched the desired output
1300
1301void TMVA::MethodMLP::UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight)
1302{
1303 // Norm for softmax
1304 Double_t norm = 0.;
1305 for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1306 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1307 norm += TMath::Exp(act);
1308 }
1309
1310 // Get output of network, and apply softmax
1311 for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1312 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1314 Double_t error = output - desired.at(i);
1315 error *= eventWeight;
1316 GetOutputNeuron(i)->SetError(error);
1317 }
1318
1319 // Do backpropagation
1320 CalculateNeuronDeltas();
1321 UpdateSynapses();
1322}
1323
1324////////////////////////////////////////////////////////////////////////////////
1325/// have each neuron calculate its delta by back propagation
1326
1328{
1329 TNeuron* neuron;
1331 Int_t numLayers = fNetwork->GetEntriesFast();
1333
1334 // step backwards through the network (back propagation)
1335 // deltas calculated starting at output layer
1336 for (Int_t i = numLayers-1; i >= 0; i--) {
1337 curLayer = (TObjArray*)fNetwork->At(i);
1338 numNeurons = curLayer->GetEntriesFast();
1339
1340 for (Int_t j = 0; j < numNeurons; j++) {
1341 neuron = (TNeuron*) curLayer->At(j);
1342 neuron->CalculateDelta();
1343 }
1344 }
1345}
1346
1347////////////////////////////////////////////////////////////////////////////////
1348/// create genetics class similar to GeneticCut
1349/// give it vector of parameter ranges (parameters = weights)
1350/// link fitness function of this class to ComputeEstimator
1351/// instantiate GA (see MethodCuts)
1352/// run it
1353/// then this should exist for GA, Minuit and random sampling
1354
1356{
1357 PrintMessage("Minimizing Estimator with GA");
1358
1359 // define GA parameters
1360 fGA_preCalc = 1;
1361 fGA_SC_steps = 10;
1362 fGA_SC_rate = 5;
1363 fGA_SC_factor = 0.95;
1364 fGA_nsteps = 30;
1365
1366 // ranges
1367 std::vector<Interval*> ranges;
1368
1369 Int_t numWeights = fSynapses->GetEntriesFast();
1370 for (Int_t ivar=0; ivar< numWeights; ivar++) {
1371 ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1372 }
1373
1374 FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
1375 gf->Run();
1376
1377 Double_t estimator = CalculateEstimator();
1378 Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
1379}
1380
1381////////////////////////////////////////////////////////////////////////////////
1382/// interface to the estimate
1383
1384Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1385{
1386 return ComputeEstimator( parameters );
1387}
1388
1389////////////////////////////////////////////////////////////////////////////////
1390/// this function is called by GeneticANN for GA optimization
1391
1392Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1393{
1395 Int_t numSynapses = fSynapses->GetEntriesFast();
1396
1397 for (Int_t i = 0; i < numSynapses; i++) {
1398 synapse = (TSynapse*)fSynapses->At(i);
1399 synapse->SetWeight(parameters.at(i));
1400 }
1401 if (fUseRegulator) UpdatePriors(); //zjh
1402
1403 Double_t estimator = CalculateEstimator();
1404
1405 return estimator;
1406}
1407
1408////////////////////////////////////////////////////////////////////////////////
1409/// update synapse error fields and adjust the weights (if in sequential mode)
1410
1412{
1413 TNeuron* neuron;
1416 Int_t numLayers = fNetwork->GetEntriesFast();
1417
1418 for (Int_t i = 0; i < numLayers; i++) {
1419 curLayer = (TObjArray*)fNetwork->At(i);
1420 numNeurons = curLayer->GetEntriesFast();
1421
1422 for (Int_t j = 0; j < numNeurons; j++) {
1423 neuron = (TNeuron*) curLayer->At(j);
1424 if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1425 else neuron->UpdateSynapsesSequential();
1426 }
1427 }
1428}
1429
1430////////////////////////////////////////////////////////////////////////////////
1431/// just adjust the synapse weights (should be called in batch mode)
1432
1434{
1435 TNeuron* neuron;
1438 Int_t numLayers = fNetwork->GetEntriesFast();
1439
1440 for (Int_t i = numLayers-1; i >= 0; i--) {
1441 curLayer = (TObjArray*)fNetwork->At(i);
1442 numNeurons = curLayer->GetEntriesFast();
1443
1444 for (Int_t j = 0; j < numNeurons; j++) {
1445 neuron = (TNeuron*) curLayer->At(j);
1446 neuron->AdjustSynapseWeights();
1447 }
1448 }
1449}
1450
1451////////////////////////////////////////////////////////////////////////////////
1452
1454{
1455 fPrior=0;
1456 fPriorDev.clear();
1457 Int_t nSynapses = fSynapses->GetEntriesFast();
1458 for (Int_t i=0;i<nSynapses;i++) {
1459 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1460 fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1461 fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1462 }
1463}
1464
1465////////////////////////////////////////////////////////////////////////////////
1466
1468{
1469 TMatrixD InvH(0,0);
1470 GetApproxInvHessian(InvH);
1471 Int_t numSynapses=fSynapses->GetEntriesFast();
1472 Int_t numRegulators=fRegulators.size();
1473 Float_t gamma=0,
1474 variance=1.; // Gaussian noise
1475 std::vector<Int_t> nWDP(numRegulators);
1476 std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1477 for (int i=0;i<numSynapses;i++) {
1478 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1479 Int_t idx=fRegulatorIdx[i];
1480 nWDP[idx]++;
1481 trace[idx]+=InvH[i][i];
1482 gamma+=1-fRegulators[idx]*InvH[i][i];
1483 weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1484 }
1485 if (fEstimator==kMSE) {
1486 if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1487 else variance=CalculateEstimator( Types::kTraining, 0 );
1488 }
1489
1490 //Log() << kDEBUG << Endl;
1491 for (int i=0;i<numRegulators;i++)
1492 {
1493 //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
1494 fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1495 if (fRegulators[i]<0) fRegulators[i]=0;
1496 Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
1497 }
1498 float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
1499 float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
1500
1501 Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
1502
1503}
1504
1505////////////////////////////////////////////////////////////////////////////////
1506
1508{
1509 Int_t numSynapses=fSynapses->GetEntriesFast();
1510 InvHessian.ResizeTo( numSynapses, numSynapses );
1511 InvHessian=0;
1514 Int_t nEvents = GetNEvents();
1515 for (Int_t i=0;i<nEvents;i++) {
1516 GetEvent(i);
1517 double outputValue=GetMvaValue(); // force calculation
1518 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1519 CalculateNeuronDeltas();
1520 for (Int_t j = 0; j < numSynapses; j++){
1521 TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1522 synapses->InitDelta();
1523 synapses->CalculateDelta();
1524 sens[j][0]=sensT[0][j]=synapses->GetDelta();
1525 }
1526 if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1527 else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1528 }
1529
1530 // TVectorD eValue(numSynapses);
1531 if (regulate) {
1532 for (Int_t i = 0; i < numSynapses; i++){
1533 InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1534 }
1535 }
1536 else {
1537 for (Int_t i = 0; i < numSynapses; i++){
1538 InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
1539 }
1540 }
1541
1542 InvHessian.Invert();
1543
1544}
1545
1546////////////////////////////////////////////////////////////////////////////////
1547
1549{
1550 Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation
1551
1552 // no hessian (old training file) or no error requested
1553 if (!fCalculateErrors || errLower==0 || errUpper==0)
1554 return MvaValue;
1555
1557 Int_t numSynapses=fSynapses->GetEntriesFast();
1558 if (fInvHessian.GetNcols()!=numSynapses) {
1559 Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
1560 }
1563 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1564 //GetOutputNeuron()->SetError(1.);
1565 CalculateNeuronDeltas();
1566 for (Int_t i = 0; i < numSynapses; i++){
1567 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1568 synapses->InitDelta();
1569 synapses->CalculateDelta();
1570 sensT[0][i]=synapses->GetDelta();
1571 }
1572 sens.Transpose(sensT);
1573 TMatrixD sig=sensT*fInvHessian*sens;
1574 variance=sig[0][0];
1575 median=GetOutputNeuron()->GetValue();
1576
1577 if (variance<0) {
1578 Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl;
1579 variance=0;
1580 }
1581 variance=sqrt(variance);
1582
1583 //upper
1584 MvaUpper=fOutput->Eval(median+variance);
1585 if(errUpper)
1587
1588 //lower
1589 MvaLower=fOutput->Eval(median-variance);
1590 if(errLower)
1592
1593 return MvaValue;
1594}
1595
1596
1597#ifdef MethodMLP_UseMinuit__
1598
1599////////////////////////////////////////////////////////////////////////////////
1600/// minimize using Minuit
1601
1602void TMVA::MethodMLP::MinuitMinimize()
1603{
1604 fNumberOfWeights = fSynapses->GetEntriesFast();
1605
1607
1608 // minuit-specific settings
1609 Double_t args[10];
1610
1611 // output level
1612 args[0] = 2; // put to 0 for results only, or to -1 for no garbage
1613 tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
1614 tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
1615
1616 double w[54];
1617
1618 // init parameters
1619 for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1620 TString parName = TString::Format("w%i", ipar);
1621 tfitter->SetParameter( ipar,
1622 parName, w[ipar], 0.1, 0, 0 );
1623 }
1624
1625 // define the CFN function
1626 tfitter->SetFCN( &IFCN );
1627
1628 // define fit strategy
1629 args[0] = 2;
1630 tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
1631
1632 // now do the fit !
1633 args[0] = 1e-04;
1634 tfitter->ExecuteCommand( "MIGRAD", args, 1 );
1635
1638 if (doBetter) {
1639 args[0] = 1e-04;
1640 tfitter->ExecuteCommand( "IMPROVE", args, 1 );
1641
1642 if (doEvenBetter) {
1643 args[0] = 500;
1644 tfitter->ExecuteCommand( "MINOS", args, 1 );
1645 }
1646 }
1647}
1648
1649////////////////////////////////////////////////////////////////////////////////
1650/// Evaluate the minimisation function
1651///
1652/// Input parameters:
1653/// - npars: number of currently variable parameters
1654/// CAUTION: this is not (necessarily) the dimension of the fitPars vector !
1655/// - fitPars: array of (constant and variable) parameters
1656/// - iflag: indicates what is to be calculated (see example below)
1657/// - grad: array of gradients
1658///
1659/// Output parameters:
1660/// - f: the calculated function value.
1661/// - grad: the (optional) vector of first derivatives).
1662
1663void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1664{
1665 ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1666}
1667
1668TTHREAD_TLS(Int_t) nc = 0;
1669TTHREAD_TLS(double) minf = 1000000;
1670
1671void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1672{
1673 // first update the weights
1674 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1675 TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1676 synapse->SetWeight(fitPars[ipar]);
1677 }
1678
1679 // now compute the estimator
1680 f = CalculateEstimator();
1681
1682 nc++;
1683 if (f < minf) minf = f;
1684 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
1685 Log() << kDEBUG << Endl;
1686 Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
1687}
1688
1689////////////////////////////////////////////////////////////////////////////////
1690/// global "this" pointer to be used in minuit
1691
1692TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1693{
1694 return fgThis;
1695}
1696
1697#endif
1698
1699
1700////////////////////////////////////////////////////////////////////////////////
1701/// write specific classifier response
1702
1703void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1704{
1706}
1707
1708////////////////////////////////////////////////////////////////////////////////
1709/// get help message text
1710///
1711/// typical length of text line:
1712/// "|--------------------------------------------------------------|"
1713
1715{
1716 TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1718
1719 Log() << Endl;
1720 Log() << col << "--- Short description:" << colres << Endl;
1721 Log() << Endl;
1722 Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1723 Log() << "forward multilayer perceptron implementation. The MLP has a user-" << Endl;
1724 Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
1725 Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1726 Log() << "signal and one background). " << Endl;
1727 Log() << Endl;
1728 Log() << col << "--- Performance optimisation:" << colres << Endl;
1729 Log() << Endl;
1730 Log() << "Neural networks are stable and performing for a large variety of " << Endl;
1731 Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
1732 Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1733 Log() << "number of input variables that have only little discrimination power. " << Endl;
1734 Log() << "" << Endl;
1735 Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1736 Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1737 Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1738 Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1739 Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
1740 Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
1741 Log() << "competitive results)." << Endl;
1742 Log() << Endl;
1743 Log() << col << "Overtraining: " << colres
1744 << "only the TMlpANN performs an explicit separation of the" << Endl;
1745 Log() << "full training sample into independent training and validation samples." << Endl;
1746 Log() << "We have found that in most high-energy physics applications the " << Endl;
1747 Log() << "available degrees of freedom (training events) are sufficient to " << Endl;
1748 Log() << "constrain the weights of the relatively simple architectures required" << Endl;
1749 Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
1750 Log() << "the use of validation samples would only reduce the available training" << Endl;
1751 Log() << "information. However, if the performance on the training sample is " << Endl;
1752 Log() << "found to be significantly better than the one found with the inde-" << Endl;
1753 Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
1754 Log() << "are printed to standard output at the end of each training job." << Endl;
1755 Log() << Endl;
1756 Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
1757 Log() << Endl;
1758 Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
1759 Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1760 Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
1761 Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1762 Log() << "in favour of more neurons in the first hidden layer." << Endl;
1763 Log() << "" << Endl;
1764 Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
1765 Log() << "adjustable weights is small compared to the training sample size," << Endl;
1766 Log() << "using a large number of training samples should not lead to overtraining." << Endl;
1767}
1768
#define REGISTER_METHOD(CLASS)
for example
#define d(i)
Definition RSha256.hxx:102
#define f(i)
Definition RSha256.hxx:104
#define a(i)
Definition RSha256.hxx:99
#define e(i)
Definition RSha256.hxx:103
bool Bool_t
Boolean (0=false, 1=true) (bool)
Definition RtypesCore.h:77
int Int_t
Signed integer 4 bytes (int)
Definition RtypesCore.h:59
float Float_t
Float 4 bytes (float)
Definition RtypesCore.h:71
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t target
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
TMatrixT< Double_t > TMatrixD
Definition TMatrixDfwd.h:23
const_iterator begin() const
const_iterator end() const
<div class="legacybox"><h2>Legacy Code</h2> TFitter is a legacy interface: there will be no bug fixes...
Definition TFitter.h:19
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:878
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:109
Bool_t WriteOptionsReference() const
Definition Config.h:65
Class that contains all the data information.
Definition DataSetInfo.h:62
Base class for TMVA fitters.
Definition FitterBase.h:51
Fitter using a Genetic Algorithm.
The TMVA::Interval Class.
Definition Interval.h:61
Base class for all TMVA methods using artificial neural networks.
void ProcessOptions() override
do nothing specific at this moment
void MakeClassSpecific(std::ostream &, const TString &) const override
write specific classifier response
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
get the mva value generated by the NN
Multilayer Perceptron class built off of MethodANNBase.
Definition MethodMLP.h:69
void GetHelpMessage() const override
get help message text
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with back propagation algorithm
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
zjh
void MakeClassSpecific(std::ostream &, const TString &) const override
write specific classifier response
void DeclareOptions() override
define the options (their key words) that can be set in the option string
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
void SteepestDir(TMatrixD &Dir)
void TrainOneEpoch()
train network over a single epoch/cycle of events
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
void InitializeLearningRates()
initialize learning rates of synapses, used only by back propagation
void CalculateNeuronDeltas()
have each neuron calculate its delta by back propagation
Double_t EstimatorFunction(std::vector< Double_t > &parameters) override
interface to the estimate
Double_t DerivDir(TMatrixD &Dir)
Double_t GetCEErr(const Event *ev, UInt_t index=0)
zjh
virtual ~MethodMLP()
destructor nothing to be done
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
void Shuffle(Int_t *index, Int_t n)
Input:
void SimulateEvent(const Event *ev)
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
void UpdatePriors()
zjh
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
rank-1 approximation, neglect 2nd derivatives. //zjh
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Double_t GetError()
void Init() override
default initializations
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets) override
MLP can handle classification with 2 classes and regression with one regression-target.
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
Definition MethodMLP.cxx:89
void UpdateRegulators()
zjh
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=nullptr)
zjh
Double_t GetMvaValue(Double_t *err=nullptr, Double_t *errUpper=nullptr) override
get the mva value generated by the NN
void Train() override
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
void ProcessOptions() override
process user options
Neuron class used by TMVA artificial neural network methods.
Definition TNeuron.h:49
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
Definition TNeuron.cxx:262
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
Definition TNeuron.cxx:83
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
Definition TNeuron.cxx:241
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
Definition TNeuron.cxx:223
void CalculateDelta()
calculate error field
Definition TNeuron.cxx:114
Synapse class used by TMVA artificial neural network methods.
Definition TSynapse.h:42
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition Tools.cxx:110
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:828
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kMulticlass
Definition Types.h:129
@ kClassification
Definition Types.h:127
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
@ kDEBUG
Definition Types.h:56
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1) override
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
TMatrixT< Element > & Invert(Double_t *det=nullptr)
Invert the matrix and calculate its determinant.
An array of TObjects.
Definition TObjArray.h:31
Basic string class.
Definition TString.h:138
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2384
This is a simple weighted bidirectional connection between two neurons.
Definition TSynapse.h:20
Double_t x[n]
Definition legend1.C:17
const Int_t n
Definition legend1.C:16
Config & gConfig()
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Double_t Exp(Double_t x)
Returns the base-e exponential function of x, which is e raised to the power x.
Definition TMath.h:720
Double_t Log(Double_t x)
Returns the natural logarithm of x.
Definition TMath.h:767
Double_t Sqrt(Double_t x)
Returns the square root of x.
Definition TMath.h:673
static void output()