Logo ROOT   6.16/01
Reference Guide
MethodMLP.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard v. Toerne, Jan Therhaag, Jiahang Zhong
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodMLP *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * ANN Multilayer Perceptron class for the discrimination of signal *
12 * from background. BFGS implementation based on TMultiLayerPerceptron *
13 * class from ROOT (http://root.cern.ch). *
14 * *
15 * Authors (alphabetical): *
16 * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18 * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19 * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20 * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21 * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22 * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25 * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26 * *
27 * Copyright (c) 2005-2011: *
28 * CERN, Switzerland *
29 * U. of Victoria, Canada *
30 * MPI-K Heidelberg, Germany *
31 * U. of Bonn, Germany *
32 * *
33 * Redistribution and use in source and binary forms, with or without *
34 * modification, are permitted according to the terms listed in LICENSE *
35 * (http://tmva.sourceforge.net/LICENSE) *
36 **********************************************************************************/
37
38/*! \class TMVA::MethodMLP
39\ingroup TMVA
40
41Multilayer Perceptron class built off of MethodANNBase
42
43*/
44
45#include "TMVA/MethodMLP.h"
46
47#include "TMVA/Config.h"
48#include "TMVA/Configurable.h"
51#include "TMVA/DataSet.h"
52#include "TMVA/DataSetInfo.h"
53#include "TMVA/FitterBase.h"
54#include "TMVA/GeneticFitter.h"
55#include "TMVA/IFitterTarget.h"
56#include "TMVA/IMethod.h"
57#include "TMVA/Interval.h"
58#include "TMVA/MethodANNBase.h"
59#include "TMVA/MsgLogger.h"
60#include "TMVA/TNeuron.h"
61#include "TMVA/TSynapse.h"
62#include "TMVA/Timer.h"
63#include "TMVA/Tools.h"
64#include "TMVA/Types.h"
65
66#include "TH1.h"
67#include "TString.h"
68#include "TTree.h"
69#include "Riostream.h"
70#include "TFitter.h"
71#include "TMatrixD.h"
72#include "TMath.h"
73#include "TFile.h"
74
75#include <cmath>
76#include <vector>
77
78#ifdef MethodMLP_UseMinuit__
79TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
80Bool_t MethodMLP_UseMinuit = kTRUE;
81#endif
82
84
86
87 using std::vector;
88
89////////////////////////////////////////////////////////////////////////////////
90/// standard constructor
91
93 const TString& methodTitle,
94 DataSetInfo& theData,
95 const TString& theOption)
96 : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption),
97 fUseRegulator(false), fCalculateErrors(false),
98 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
99 fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
100 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
101 fSamplingTraining(false), fSamplingTesting(false),
102 fLastAlpha(0.0), fTau(0.),
103 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
104 fBPMode(kSequential), fBpModeS("None"),
105 fBatchSize(0), fTestRate(0), fEpochMon(false),
106 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
107 fGA_SC_rate(0), fGA_SC_factor(0.0),
108 fDeviationsFromTargets(0),
109 fWeightRange (1.0)
110{
111
112}
113
114////////////////////////////////////////////////////////////////////////////////
115/// constructor from a weight file
116
118 const TString& theWeightFile)
119 : MethodANNBase( Types::kMLP, theData, theWeightFile),
120 fUseRegulator(false), fCalculateErrors(false),
121 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
122 fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
123 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
124 fSamplingTraining(false), fSamplingTesting(false),
125 fLastAlpha(0.0), fTau(0.),
126 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
127 fBPMode(kSequential), fBpModeS("None"),
128 fBatchSize(0), fTestRate(0), fEpochMon(false),
129 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
130 fGA_SC_rate(0), fGA_SC_factor(0.0),
131 fDeviationsFromTargets(0),
132 fWeightRange (1.0)
133{
134}
135
136////////////////////////////////////////////////////////////////////////////////
137/// destructor
138/// nothing to be done
139
141{
142}
143
145{
146 Train(NumCycles());
147}
148
149
150
151////////////////////////////////////////////////////////////////////////////////
152/// MLP can handle classification with 2 classes and regression with one regression-target
153
155{
156 if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
157 if (type == Types::kMulticlass ) return kTRUE;
158 if (type == Types::kRegression ) return kTRUE;
159
160 return kFALSE;
161}
162
163////////////////////////////////////////////////////////////////////////////////
164/// default initializations
165
167{
168 // the minimum requirement to declare an event signal-like
169 SetSignalReferenceCut( 0.5 );
170#ifdef MethodMLP_UseMinuit__
171 fgThis = this;
172#endif
173}
174
175////////////////////////////////////////////////////////////////////////////////
176/// define the options (their key words) that can be set in the option string
177///
178/// know options:
179///
180/// - TrainingMethod <string> Training method
181/// available values are:
182/// - BP Back-Propagation <default>
183/// - GA Genetic Algorithm (takes a LONG time)
184///
185/// - LearningRate <float> NN learning rate parameter
186/// - DecayRate <float> Decay rate for learning parameter
187/// - TestRate <int> Test for overtraining performed at each #th epochs
188///
189/// - BPMode <string> Back-propagation learning mode
190/// available values are:
191/// - sequential <default>
192/// - batch
193///
194/// - BatchSize <int> Batch size: number of events/batch, only set if in Batch Mode,
195/// - -1 for BatchSize=number_of_events
196
198{
199 DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
200 "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
201 AddPreDefVal(TString("BP"));
202 AddPreDefVal(TString("GA"));
203 AddPreDefVal(TString("BFGS"));
204
205 DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
206 DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
207 DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
208 DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
209
210 DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
211 DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
212 DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
213
214 DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
215 DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
216
217 DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
218 DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
219
220 DeclareOptionRef(fBpModeS="sequential", "BPMode",
221 "Back-propagation learning mode: sequential or batch");
222 AddPreDefVal(TString("sequential"));
223 AddPreDefVal(TString("batch"));
224
225 DeclareOptionRef(fBatchSize=-1, "BatchSize",
226 "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
227
228 DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
229 "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
230
231 DeclareOptionRef(fSteps=-1, "ConvergenceTests",
232 "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
233
234 DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
235 "Use regulator to avoid over-training"); //zjh
236 DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit",
237 "Maximum times of regulator update"); //zjh
238 DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
239 "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh
240
241 DeclareOptionRef(fWeightRange=1.0, "WeightRange",
242 "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
243
244}
245
246////////////////////////////////////////////////////////////////////////////////
247/// process user options
248
250{
252
253
254 if (IgnoreEventsWithNegWeightsInTraining()) {
255 Log() << kINFO
256 << "Will ignore negative events in training!"
257 << Endl;
258 }
259
260
261 if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
262 else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
263 else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
264
265 if (fBpModeS == "sequential") fBPMode = kSequential;
266 else if (fBpModeS == "batch") fBPMode = kBatch;
267
268 // InitializeLearningRates();
269
270 if (fBPMode == kBatch) {
271 Data()->SetCurrentType(Types::kTraining);
272 Int_t numEvents = Data()->GetNEvents();
273 if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
274 }
275}
276
277////////////////////////////////////////////////////////////////////////////////
278/// initialize learning rates of synapses, used only by back propagation
279
281{
282 Log() << kDEBUG << "Initialize learning rates" << Endl;
283 TSynapse *synapse;
284 Int_t numSynapses = fSynapses->GetEntriesFast();
285 for (Int_t i = 0; i < numSynapses; i++) {
286 synapse = (TSynapse*)fSynapses->At(i);
287 synapse->SetLearningRate(fLearnRate);
288 }
289}
290
291////////////////////////////////////////////////////////////////////////////////
292/// calculate the estimator that training is attempting to minimize
293
295{
296 // sanity check
297 if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
298 Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
299 }
300
301 Types::ETreeType saveType = Data()->GetCurrentType();
302 Data()->SetCurrentType(treeType);
303
304 // if epochs are counted create monitoring histograms (only available for classification)
305 TString type = (treeType == Types::kTraining ? "train" : "test");
306 TString name = Form("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
307 TString nameB = name + "_B";
308 TString nameS = name + "_S";
309 Int_t nbin = 100;
310 Float_t limit = 2;
311 TH1* histS = 0;
312 TH1* histB = 0;
313 if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
314 histS = new TH1F( nameS, nameS, nbin, -limit, limit );
315 histB = new TH1F( nameB, nameB, nbin, -limit, limit );
316 }
317
318 Double_t estimator = 0;
319
320 // loop over all training events
321 Int_t nEvents = GetNEvents();
322 UInt_t nClasses = DataInfo().GetNClasses();
323 UInt_t nTgts = DataInfo().GetNTargets();
324
325
326 Float_t sumOfWeights = 0.f;
327 if( fWeightRange < 1.f ){
328 fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents);
329 }
330
331 for (Int_t i = 0; i < nEvents; i++) {
332
333 const Event* ev = GetEvent(i);
334
335 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
336 && (saveType == Types::kTraining)){
337 continue;
338 }
339
340 Double_t w = ev->GetWeight();
341
342 ForceNetworkInputs( ev );
343 ForceNetworkCalculations();
344
345 Double_t d = 0, v = 0;
346 if (DoRegression()) {
347 for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
348 v = GetOutputNeuron( itgt )->GetActivationValue();
349 Double_t targetValue = ev->GetTarget( itgt );
350 Double_t dt = v - targetValue;
351 d += (dt*dt);
352 }
353 estimator += d*w;
354 } else if (DoMulticlass() ) {
355 UInt_t cls = ev->GetClass();
356 if (fEstimator==kCE){
357 Double_t norm(0);
358 for (UInt_t icls = 0; icls < nClasses; icls++) {
359 Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
360 norm += exp( activationValue );
361 if(icls==cls)
362 d = exp( activationValue );
363 }
364 d = -TMath::Log(d/norm);
365 }
366 else{
367 for (UInt_t icls = 0; icls < nClasses; icls++) {
368 Double_t desired = (icls==cls) ? 1.0 : 0.0;
369 v = GetOutputNeuron( icls )->GetActivationValue();
370 d = (desired-v)*(desired-v);
371 }
372 }
373 estimator += d*w; //zjh
374 } else {
375 Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
376 v = GetOutputNeuron()->GetActivationValue();
377 if (fEstimator==kMSE) d = (desired-v)*(desired-v); //zjh
378 else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v)); //zjh
379 estimator += d*w; //zjh
380 }
381
382 if( fDeviationsFromTargets )
383 fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
384
385 sumOfWeights += w;
386
387
388 // fill monitoring histograms
389 if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
390 else if (histB != 0) histB->Fill( float(v), float(w) );
391 }
392
393
394 if( fDeviationsFromTargets ) {
395 std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
396
397 Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
398 estimator = 0.f;
399
400 Float_t weightRangeCut = fWeightRange*sumOfWeights;
401 Float_t weightSum = 0.f;
402 for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
403 float deviation = (*itDev).first;
404 float devWeight = (*itDev).second;
405 weightSum += devWeight; // add the weight of this event
406 if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange
407 estimator += devWeight*deviation;
408 }
409 }
410
411 sumOfWeights = sumOfWeightsInRange;
412 delete fDeviationsFromTargets;
413 }
414
415 if (histS != 0) fEpochMonHistS.push_back( histS );
416 if (histB != 0) fEpochMonHistB.push_back( histB );
417
418 //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
419 //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
420 //else estimator = estimator*0.5/Float_t(nEvents);
421 if (DoRegression()) estimator = estimator/Float_t(sumOfWeights);
422 else if (DoMulticlass()) estimator = estimator/Float_t(sumOfWeights);
423 else estimator = estimator/Float_t(sumOfWeights);
424
425
426 //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh
427
428 Data()->SetCurrentType( saveType );
429
430 // provide epoch-wise monitoring
431 if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
432 CreateWeightMonitoringHists( Form("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
433 }
434
435 return estimator;
436}
437
438////////////////////////////////////////////////////////////////////////////////
439
441{
442 if (fNetwork == 0) {
443 //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
444 Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
445 SetAnalysisType(GetAnalysisType());
446 }
447 Log() << kDEBUG << "reinitialize learning rates" << Endl;
448 InitializeLearningRates();
449 Log() << kHEADER;
450 PrintMessage("Training Network");
451 Log() << Endl;
452 Int_t nEvents=GetNEvents();
453 Int_t nSynapses=fSynapses->GetEntriesFast();
454 if (nSynapses>nEvents)
455 Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
456
457 fIPyMaxIter = nEpochs;
458 if (fInteractive && fInteractive->NotInitialized()){
459 std::vector<TString> titles = {"Error on training set", "Error on test set"};
460 fInteractive->Init(titles);
461 }
462
463#ifdef MethodMLP_UseMinuit__
464 if (useMinuit) MinuitMinimize();
465#else
466 if (fTrainingMethod == kGA) GeneticMinimize();
467 else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
468 else BackPropagationMinimize(nEpochs);
469#endif
470
471 float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
472 float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
473 if (fUseRegulator){
474 Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
475 UpdateRegulators();
476 Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
477 }
478
479 if( fCalculateErrors || fUseRegulator )
480 {
481 Int_t numSynapses=fSynapses->GetEntriesFast();
482 fInvHessian.ResizeTo(numSynapses,numSynapses);
483 GetApproxInvHessian( fInvHessian ,false);
484 }
485 ExitFromTraining();
486}
487
488////////////////////////////////////////////////////////////////////////////////
489/// train network with BFGS algorithm
490
492{
493 Timer timer( (fSteps>0?100:nEpochs), GetName() );
494
495 // create histograms for overtraining monitoring
496 Int_t nbinTest = Int_t(nEpochs/fTestRate);
497 if(!IsSilentFile())
498 {
499 fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
500 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
501 fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
502 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
503 }
504
505 Int_t nSynapses = fSynapses->GetEntriesFast();
506 Int_t nWeights = nSynapses;
507
508 for (Int_t i=0;i<nSynapses;i++) {
509 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
510 synapse->SetDEDw(0.0);
511 }
512
513 std::vector<Double_t> buffer( nWeights );
514 for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
515
516 TMatrixD Dir ( nWeights, 1 );
517 TMatrixD Hessian ( nWeights, nWeights );
518 TMatrixD Gamma ( nWeights, 1 );
519 TMatrixD Delta ( nWeights, 1 );
520 Int_t RegUpdateCD=0; //zjh
521 Int_t RegUpdateTimes=0; //zjh
522 Double_t AccuError=0;
523
524 Double_t trainE = -1;
525 Double_t testE = -1;
526
527 fLastAlpha = 0.;
528
529 if(fSamplingTraining || fSamplingTesting)
530 Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
531
532 if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
533 timer.DrawProgressBar( 0 );
534
535 // start training cycles (epochs)
536 for (Int_t i = 0; i < nEpochs; i++) {
537
538 if (fExitFromTraining) break;
539 fIPyCurrentIter = i;
540 if (Float_t(i)/nEpochs < fSamplingEpoch) {
541 if ((i+1)%fTestRate == 0 || (i == 0)) {
542 if (fSamplingTraining) {
543 Data()->SetCurrentType( Types::kTraining );
544 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
545 Data()->CreateSampling();
546 }
547 if (fSamplingTesting) {
548 Data()->SetCurrentType( Types::kTesting );
549 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
550 Data()->CreateSampling();
551 }
552 }
553 }
554 else {
555 Data()->SetCurrentType( Types::kTraining );
556 Data()->InitSampling(1.0,1.0);
557 Data()->SetCurrentType( Types::kTesting );
558 Data()->InitSampling(1.0,1.0);
559 }
560 Data()->SetCurrentType( Types::kTraining );
561
562 //zjh
563 if (fUseRegulator) {
564 UpdatePriors();
565 RegUpdateCD++;
566 }
567 //zjh
568
569 SetGammaDelta( Gamma, Delta, buffer );
570
571 if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
572 SteepestDir( Dir );
573 Hessian.UnitMatrix();
574 RegUpdateCD=0; //zjh
575 }
576 else {
577 if (GetHessian( Hessian, Gamma, Delta )) {
578 SteepestDir( Dir );
579 Hessian.UnitMatrix();
580 RegUpdateCD=0; //zjh
581 }
582 else SetDir( Hessian, Dir );
583 }
584
585 Double_t dError=0; //zjh
586 if (DerivDir( Dir ) > 0) {
587 SteepestDir( Dir );
588 Hessian.UnitMatrix();
589 RegUpdateCD=0; //zjh
590 }
591 if (LineSearch( Dir, buffer, &dError )) { //zjh
592 Hessian.UnitMatrix();
593 SteepestDir( Dir );
594 RegUpdateCD=0; //zjh
595 if (LineSearch(Dir, buffer, &dError)) { //zjh
596 i = nEpochs;
597 Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
598 }
599 }
600
601 //zjh+
602 if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
603 AccuError+=dError;
604
605 if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
606 Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
607 UpdateRegulators();
608 Hessian.UnitMatrix();
609 RegUpdateCD=0;
610 RegUpdateTimes++;
611 AccuError=0;
612 }
613 //zjh-
614
615 // monitor convergence of training and control sample
616 if ((i+1)%fTestRate == 0) {
617 //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample //zjh
618 //testE = CalculateEstimator( Types::kTesting, i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
619 trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample //zjh
620 testE = CalculateEstimator( Types::kTesting, i ) ; // estimator for test sample //zjh
621 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
622 if(!IsSilentFile()) //saved to see in TMVAGui, no needed without file
623 {
624 fEstimatorHistTrain->Fill( i+1, trainE );
625 fEstimatorHistTest ->Fill( i+1, testE );
626 }
627 Bool_t success = kFALSE;
628 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
629 success = kTRUE;
630 }
631 Data()->EventResult( success );
632
633 SetCurrentValue( testE );
634 if (HasConverged()) {
635 if (Float_t(i)/nEpochs < fSamplingEpoch) {
636 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
637 i = newEpoch;
638 ResetConvergenceCounter();
639 }
640 else break;
641 }
642 }
643
644 // draw progress
645 TString convText = Form( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh
646 if (fSteps > 0) {
647 Float_t progress = 0;
648 if (Float_t(i)/nEpochs < fSamplingEpoch)
649 // progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
650 progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
651 else
652 {
653 // progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
654 progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
655 }
656 Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
657 if (progress2>progress) progress=progress2; //zjh
658 timer.DrawProgressBar( Int_t(progress), convText );
659 }
660 else {
661 Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
662 if (progress<i) progress=i; //zjh
663 timer.DrawProgressBar( progress, convText ); //zjh
664 }
665
666 // some verbose output
667 if (fgPRINT_SEQ) {
668 PrintNetwork();
669 WaitForKeyboard();
670 }
671 }
672}
673
674////////////////////////////////////////////////////////////////////////////////
675
676void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
677{
678 Int_t nWeights = fSynapses->GetEntriesFast();
679
680 Int_t IDX = 0;
681 Int_t nSynapses = fSynapses->GetEntriesFast();
682 for (Int_t i=0;i<nSynapses;i++) {
683 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
684 Gamma[IDX++][0] = -synapse->GetDEDw();
685 }
686
687 for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
688
689 ComputeDEDw();
690
691 IDX = 0;
692 for (Int_t i=0;i<nSynapses;i++)
693 {
694 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
695 Gamma[IDX++][0] += synapse->GetDEDw();
696 }
697}
698
699////////////////////////////////////////////////////////////////////////////////
700
702{
703 Int_t nSynapses = fSynapses->GetEntriesFast();
704 for (Int_t i=0;i<nSynapses;i++) {
705 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
706 synapse->SetDEDw( 0.0 );
707 }
708
709 Int_t nEvents = GetNEvents();
710 Int_t nPosEvents = nEvents;
711 for (Int_t i=0;i<nEvents;i++) {
712
713 const Event* ev = GetEvent(i);
714 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
715 && (Data()->GetCurrentType() == Types::kTraining)){
716 --nPosEvents;
717 continue;
718 }
719
720 SimulateEvent( ev );
721
722 for (Int_t j=0;j<nSynapses;j++) {
723 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
724 synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
725 }
726 }
727
728 for (Int_t i=0;i<nSynapses;i++) {
729 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
730 Double_t DEDw=synapse->GetDEDw(); //zjh
731 if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
732 synapse->SetDEDw( DEDw / nPosEvents ); //zjh
733 }
734}
735
736////////////////////////////////////////////////////////////////////////////////
737
739{
740 Double_t eventWeight = ev->GetWeight();
741
742 ForceNetworkInputs( ev );
743 ForceNetworkCalculations();
744
745 if (DoRegression()) {
746 UInt_t ntgt = DataInfo().GetNTargets();
747 for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
748 Double_t desired = ev->GetTarget(itgt);
749 Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
750 GetOutputNeuron( itgt )->SetError(error);
751 }
752 } else if (DoMulticlass()) {
753 UInt_t nClasses = DataInfo().GetNClasses();
754 UInt_t cls = ev->GetClass();
755 for (UInt_t icls = 0; icls < nClasses; icls++) {
756 Double_t desired = ( cls==icls ? 1.0 : 0.0 );
757 Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
758 GetOutputNeuron( icls )->SetError(error);
759 }
760 } else {
761 Double_t desired = GetDesiredOutput( ev );
762 Double_t error=-1; //zjh
763 if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight; //zjh
764 else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
765 GetOutputNeuron()->SetError(error);
766 }
767
768 CalculateNeuronDeltas();
769 for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
770 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
771 synapse->InitDelta();
772 synapse->CalculateDelta();
773 }
774}
775
776////////////////////////////////////////////////////////////////////////////////
777
779{
780 Int_t IDX = 0;
781 Int_t nSynapses = fSynapses->GetEntriesFast();
782
783 for (Int_t i=0;i<nSynapses;i++) {
784 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
785 Dir[IDX++][0] = -synapse->GetDEDw();
786 }
787}
788
789////////////////////////////////////////////////////////////////////////////////
790
792{
794 if ((Double_t) gd[0][0] == 0.) return kTRUE;
795 TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
798 Double_t a = 1 / (Double_t) gd[0][0];
799 Double_t f = 1 + ((Double_t)gHg[0][0]*a);
801 res *= f;
802 res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
804 res *= a;
805 Hessian += res;
806
807 return kFALSE;
808}
809
810////////////////////////////////////////////////////////////////////////////////
811
813{
814 Int_t IDX = 0;
815 Int_t nSynapses = fSynapses->GetEntriesFast();
816 TMatrixD DEDw(nSynapses, 1);
817
818 for (Int_t i=0;i<nSynapses;i++) {
819 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
820 DEDw[IDX++][0] = synapse->GetDEDw();
821 }
822
823 dir = Hessian * DEDw;
824 for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
825}
826
827////////////////////////////////////////////////////////////////////////////////
828
830{
831 Int_t IDX = 0;
832 Int_t nSynapses = fSynapses->GetEntriesFast();
833 Double_t Result = 0.0;
834
835 for (Int_t i=0;i<nSynapses;i++) {
836 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
837 Result += Dir[IDX++][0] * synapse->GetDEDw();
838 }
839 return Result;
840}
841
842////////////////////////////////////////////////////////////////////////////////
843
844Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
845{
846 Int_t IDX = 0;
847 Int_t nSynapses = fSynapses->GetEntriesFast();
848 Int_t nWeights = nSynapses;
849
850 std::vector<Double_t> Origin(nWeights);
851 for (Int_t i=0;i<nSynapses;i++) {
852 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
853 Origin[i] = synapse->GetWeight();
854 }
855
856 Double_t err1 = GetError();
857 Double_t errOrigin=err1;//zjh
858 Double_t alpha1 = 0.;
859 Double_t alpha2 = fLastAlpha;
860
861
862 if (alpha2 < 0.01) alpha2 = 0.01;
863 else if (alpha2 > 2.0) alpha2 = 2.0;
864 Double_t alpha_original = alpha2;
865 Double_t alpha3 = alpha2;
866
867 SetDirWeights( Origin, Dir, alpha2 );
868 Double_t err2 = GetError();
869 //Double_t err2 = err1;
870 Double_t err3 = err2;
871 Bool_t bingo = kFALSE;
872
873
874 if (err1 > err2) {
875 for (Int_t i=0;i<100;i++) {
876 alpha3 *= fTau;
877 SetDirWeights(Origin, Dir, alpha3);
878 err3 = GetError();
879 if (err3 > err2) {
880 bingo = kTRUE;
881 break;
882 }
883 alpha1 = alpha2;
884 err1 = err2;
885 alpha2 = alpha3;
886 err2 = err3;
887 }
888 if (!bingo) {
889 SetDirWeights(Origin, Dir, 0.);
890 return kTRUE;
891 }
892 }
893 else {
894 for (Int_t i=0;i<100;i++) {
895 alpha2 /= fTau;
896 if (i==50) {
897 Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
898 alpha2 = -alpha_original;
899 }
900 SetDirWeights(Origin, Dir, alpha2);
901 err2 = GetError();
902 if (err1 > err2) {
903 bingo = kTRUE;
904 break;
905 }
906 alpha3 = alpha2;
907 err3 = err2;
908 }
909 if (!bingo) {
910 SetDirWeights(Origin, Dir, 0.);
911 Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
912 fLastAlpha = 0.05;
913 return kTRUE;
914 }
915 }
916
917 if (alpha1>0 && alpha2>0 && alpha3 > 0) {
918 fLastAlpha = 0.5 * (alpha1 + alpha3 -
919 (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
920 - ( err2 - err1 ) / (alpha2 - alpha1 )));
921 }
922 else {
923 fLastAlpha = alpha2;
924 }
925
926 fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
927
928 SetDirWeights(Origin, Dir, fLastAlpha);
929
930 // leaving these lines uncommented is a heavy price to pay for only a warning message
931 // (which shouldn't appear anyway)
932 // --> about 15% of time is spent in the final GetError().
933 //
934 Double_t finalError = GetError();
935 if (finalError > err1) {
936 Log() << kWARNING << "Line search increased error! Something is wrong."
937 << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
938 << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
939 }
940
941 for (Int_t i=0;i<nSynapses;i++) {
942 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
943 buffer[IDX] = synapse->GetWeight() - Origin[IDX];
944 IDX++;
945 }
946
947 if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
948
949 return kFALSE;
950}
951
952////////////////////////////////////////////////////////////////////////////////
953
954void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
955{
956 Int_t IDX = 0;
957 Int_t nSynapses = fSynapses->GetEntriesFast();
958
959 for (Int_t i=0;i<nSynapses;i++) {
960 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
961 synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
962 IDX++;
963 }
964 if (fUseRegulator) UpdatePriors();//zjh
965}
966
967
968////////////////////////////////////////////////////////////////////////////////
969
971{
972 Int_t nEvents = GetNEvents();
973 UInt_t ntgts = GetNTargets();
974 Double_t Result = 0.;
975
976 for (Int_t i=0;i<nEvents;i++) {
977 const Event* ev = GetEvent(i);
978
979 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
980 && (Data()->GetCurrentType() == Types::kTraining)){
981 continue;
982 }
983 SimulateEvent( ev );
984
985 Double_t error = 0.;
986 if (DoRegression()) {
987 for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
988 error += GetMSEErr( ev, itgt );//zjh
989 }
990 } else if ( DoMulticlass() ){
991 for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
992 error += GetMSEErr( ev, icls );
993 }
994 } else {
995 if (fEstimator==kMSE) error = GetMSEErr( ev ); //zjh
996 else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
997 }
998 Result += error * ev->GetWeight();
999 }
1000 if (fUseRegulator) Result+=fPrior; //zjh
1001 if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
1002 return Result;
1003}
1004
1005////////////////////////////////////////////////////////////////////////////////
1006
1008{
1009 Double_t error = 0;
1010 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1011 Double_t target = 0;
1012 if (DoRegression()) target = ev->GetTarget( index );
1013 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1014 else target = GetDesiredOutput( ev );
1015
1016 error = 0.5*(output-target)*(output-target); //zjh
1017
1018 return error;
1019
1020}
1021
1022////////////////////////////////////////////////////////////////////////////////
1023
1025{
1026 Double_t error = 0;
1027 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1028 Double_t target = 0;
1029 if (DoRegression()) target = ev->GetTarget( index );
1030 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1031 else target = GetDesiredOutput( ev );
1032
1033 error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
1034
1035 return error;
1036}
1037
1038////////////////////////////////////////////////////////////////////////////////
1039/// minimize estimator / train network with back propagation algorithm
1040
1042{
1043 // Timer timer( nEpochs, GetName() );
1044 Timer timer( (fSteps>0?100:nEpochs), GetName() );
1045 Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1046
1047 // create histograms for overtraining monitoring
1048 Int_t nbinTest = Int_t(nEpochs/fTestRate);
1049 if(!IsSilentFile())
1050 {
1051 fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
1052 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1053 fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
1054 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1055 }
1056 if(fSamplingTraining || fSamplingTesting)
1057 Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
1058
1059 if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
1060 timer.DrawProgressBar(0);
1061
1062 // estimators
1063 Double_t trainE = -1;
1064 Double_t testE = -1;
1065
1066 // start training cycles (epochs)
1067 for (Int_t i = 0; i < nEpochs; i++) {
1068
1069 if (fExitFromTraining) break;
1070 fIPyCurrentIter = i;
1071 if (Float_t(i)/nEpochs < fSamplingEpoch) {
1072 if ((i+1)%fTestRate == 0 || (i == 0)) {
1073 if (fSamplingTraining) {
1074 Data()->SetCurrentType( Types::kTraining );
1075 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1076 Data()->CreateSampling();
1077 }
1078 if (fSamplingTesting) {
1079 Data()->SetCurrentType( Types::kTesting );
1080 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1081 Data()->CreateSampling();
1082 }
1083 }
1084 }
1085 else {
1086 Data()->SetCurrentType( Types::kTraining );
1087 Data()->InitSampling(1.0,1.0);
1088 Data()->SetCurrentType( Types::kTesting );
1089 Data()->InitSampling(1.0,1.0);
1090 }
1091 Data()->SetCurrentType( Types::kTraining );
1092
1093 TrainOneEpoch();
1094 DecaySynapseWeights(i >= lateEpoch);
1095
1096 // monitor convergence of training and control sample
1097 if ((i+1)%fTestRate == 0) {
1098 trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
1099 testE = CalculateEstimator( Types::kTesting, i ); // estimator for test sample
1100 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1101 if(!IsSilentFile())
1102 {
1103 fEstimatorHistTrain->Fill( i+1, trainE );
1104 fEstimatorHistTest ->Fill( i+1, testE );
1105 }
1106 Bool_t success = kFALSE;
1107 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1108 success = kTRUE;
1109 }
1110 Data()->EventResult( success );
1111
1112 SetCurrentValue( testE );
1113 if (HasConverged()) {
1114 if (Float_t(i)/nEpochs < fSamplingEpoch) {
1115 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1116 i = newEpoch;
1117 ResetConvergenceCounter();
1118 }
1119 else {
1120 if (lateEpoch > i) lateEpoch = i;
1121 else break;
1122 }
1123 }
1124 }
1125
1126 // draw progress bar (add convergence value)
1127 TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
1128 if (fSteps > 0) {
1129 Float_t progress = 0;
1130 if (Float_t(i)/nEpochs < fSamplingEpoch)
1131 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1132 else
1133 progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1134
1135 timer.DrawProgressBar( Int_t(progress), convText );
1136 }
1137 else {
1138 timer.DrawProgressBar( i, convText );
1139 }
1140 }
1141}
1142
1143////////////////////////////////////////////////////////////////////////////////
1144/// train network over a single epoch/cycle of events
1145
1147{
1148 Int_t nEvents = Data()->GetNEvents();
1149
1150 // randomize the order events will be presented, important for sequential mode
1151 Int_t* index = new Int_t[nEvents];
1152 for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1153 Shuffle(index, nEvents);
1154
1155 // loop over all training events
1156 for (Int_t i = 0; i < nEvents; i++) {
1157
1158 const Event * ev = GetEvent(index[i]);
1159 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1160 && (Data()->GetCurrentType() == Types::kTraining)){
1161 continue;
1162 }
1163
1164 TrainOneEvent(index[i]);
1165
1166 // do adjustments if in batch mode
1167 if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1168 AdjustSynapseWeights();
1169 if (fgPRINT_BATCH) {
1170 PrintNetwork();
1171 WaitForKeyboard();
1172 }
1173 }
1174
1175 // debug in sequential mode
1176 if (fgPRINT_SEQ) {
1177 PrintNetwork();
1178 WaitForKeyboard();
1179 }
1180 }
1181
1182 delete[] index;
1183}
1184
1185////////////////////////////////////////////////////////////////////////////////
1186/// Input:
1187/// - index: the array to shuffle
1188/// - n: the size of the array
1189/// Output:
1190/// - index: the shuffled indexes
1191///
1192/// This method is used for sequential training
1193
1195{
1196 Int_t j, k;
1197 Int_t a = n - 1;
1198 for (Int_t i = 0; i < n; i++) {
1199 j = (Int_t) (frgen->Rndm() * a);
1200 if (j<n){ // address the 'worries' of coverity
1201 k = index[j];
1202 index[j] = index[i];
1203 index[i] = k;
1204 }
1205 }
1206}
1207
1208////////////////////////////////////////////////////////////////////////////////
1209/// decay synapse weights
1210/// in last 10 epochs, lower learning rate even more to find a good minimum
1211
1213{
1214 TSynapse* synapse;
1215 Int_t numSynapses = fSynapses->GetEntriesFast();
1216 for (Int_t i = 0; i < numSynapses; i++) {
1217 synapse = (TSynapse*)fSynapses->At(i);
1218 if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square.
1219 else synapse->DecayLearningRate(fDecayRate);
1220 }
1221}
1222
1223////////////////////////////////////////////////////////////////////////////////
1224/// fast per-event training
1225
1227{
1228 GetEvent(ievt);
1229
1230 // as soon as we know how to get event weights, get that here
1231
1232 // note: the normalization of event weights will affect the choice
1233 // of learning rate, one will have to experiment to get the right value.
1234 // in general, if the "average" event weight is 1, the learning rate
1235 // should be good if set around 0.02 (a good value if all event weights are 1)
1236 Double_t eventWeight = 1.0;
1237
1238 // get the desired output of this event
1239 Double_t desired;
1240 if (type == 0) desired = fOutput->GetMin(); // background //zjh
1241 else desired = fOutput->GetMax(); // signal //zjh
1242
1243 // force the value for each input neuron
1244 Double_t x;
1245 TNeuron* neuron;
1246
1247 for (UInt_t j = 0; j < GetNvar(); j++) {
1248 x = branchVar[j];
1249 if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1250 neuron = GetInputNeuron(j);
1251 neuron->ForceValue(x);
1252 }
1253
1254 ForceNetworkCalculations();
1255 UpdateNetwork(desired, eventWeight);
1256}
1257
1258////////////////////////////////////////////////////////////////////////////////
1259/// train network over a single event
1260/// this uses the new event model
1261
1263{
1264 // note: the normalization of event weights will affect the choice
1265 // of learning rate, one will have to experiment to get the right value.
1266 // in general, if the "average" event weight is 1, the learning rate
1267 // should be good if set around 0.02 (a good value if all event weights are 1)
1268
1269 const Event * ev = GetEvent(ievt);
1270 Double_t eventWeight = ev->GetWeight();
1271 ForceNetworkInputs( ev );
1272 ForceNetworkCalculations();
1273 if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1274 if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1275 else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1276}
1277
1278////////////////////////////////////////////////////////////////////////////////
1279/// get the desired output of this event
1280
1282{
1283 return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
1284}
1285
1286////////////////////////////////////////////////////////////////////////////////
1287/// update the network based on how closely
1288/// the output matched the desired output
1289
1291{
1292 Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1293 if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ; //zjh
1294 else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
1295 else Log() << kFATAL << "Estimator type unspecified!!" << Endl; //zjh
1296 error *= eventWeight;
1297 GetOutputNeuron()->SetError(error);
1298 CalculateNeuronDeltas();
1299 UpdateSynapses();
1300}
1301
1302////////////////////////////////////////////////////////////////////////////////
1303/// update the network based on how closely
1304/// the output matched the desired output
1305
1306void TMVA::MethodMLP::UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight)
1307{
1308 // Norm for softmax
1309 Double_t norm = 0.;
1310 for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1311 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1312 norm += TMath::Exp(act);
1313 }
1314
1315 // Get output of network, and apply softmax
1316 for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1317 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1318 Double_t output = TMath::Exp(act) / norm;
1319 Double_t error = output - desired.at(i);
1320 error *= eventWeight;
1321 GetOutputNeuron(i)->SetError(error);
1322 }
1323
1324 // Do backpropagation
1325 CalculateNeuronDeltas();
1326 UpdateSynapses();
1327}
1328
1329////////////////////////////////////////////////////////////////////////////////
1330/// have each neuron calculate its delta by back propagation
1331
1333{
1334 TNeuron* neuron;
1335 Int_t numNeurons;
1336 Int_t numLayers = fNetwork->GetEntriesFast();
1337 TObjArray* curLayer;
1338
1339 // step backwards through the network (back propagation)
1340 // deltas calculated starting at output layer
1341 for (Int_t i = numLayers-1; i >= 0; i--) {
1342 curLayer = (TObjArray*)fNetwork->At(i);
1343 numNeurons = curLayer->GetEntriesFast();
1344
1345 for (Int_t j = 0; j < numNeurons; j++) {
1346 neuron = (TNeuron*) curLayer->At(j);
1347 neuron->CalculateDelta();
1348 }
1349 }
1350}
1351
1352////////////////////////////////////////////////////////////////////////////////
1353/// create genetics class similar to GeneticCut
1354/// give it vector of parameter ranges (parameters = weights)
1355/// link fitness function of this class to ComputeEstimator
1356/// instantiate GA (see MethodCuts)
1357/// run it
1358/// then this should exist for GA, Minuit and random sampling
1359
1361{
1362 PrintMessage("Minimizing Estimator with GA");
1363
1364 // define GA parameters
1365 fGA_preCalc = 1;
1366 fGA_SC_steps = 10;
1367 fGA_SC_rate = 5;
1368 fGA_SC_factor = 0.95;
1369 fGA_nsteps = 30;
1370
1371 // ranges
1372 std::vector<Interval*> ranges;
1373
1374 Int_t numWeights = fSynapses->GetEntriesFast();
1375 for (Int_t ivar=0; ivar< numWeights; ivar++) {
1376 ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1377 }
1378
1379 FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
1380 gf->Run();
1381
1382 Double_t estimator = CalculateEstimator();
1383 Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
1384}
1385
1386////////////////////////////////////////////////////////////////////////////////
1387/// interface to the estimate
1388
1389Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1390{
1391 return ComputeEstimator( parameters );
1392}
1393
1394////////////////////////////////////////////////////////////////////////////////
1395/// this function is called by GeneticANN for GA optimization
1396
1397Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1398{
1399 TSynapse* synapse;
1400 Int_t numSynapses = fSynapses->GetEntriesFast();
1401
1402 for (Int_t i = 0; i < numSynapses; i++) {
1403 synapse = (TSynapse*)fSynapses->At(i);
1404 synapse->SetWeight(parameters.at(i));
1405 }
1406 if (fUseRegulator) UpdatePriors(); //zjh
1407
1408 Double_t estimator = CalculateEstimator();
1409
1410 return estimator;
1411}
1412
1413////////////////////////////////////////////////////////////////////////////////
1414/// update synapse error fields and adjust the weights (if in sequential mode)
1415
1417{
1418 TNeuron* neuron;
1419 Int_t numNeurons;
1420 TObjArray* curLayer;
1421 Int_t numLayers = fNetwork->GetEntriesFast();
1422
1423 for (Int_t i = 0; i < numLayers; i++) {
1424 curLayer = (TObjArray*)fNetwork->At(i);
1425 numNeurons = curLayer->GetEntriesFast();
1426
1427 for (Int_t j = 0; j < numNeurons; j++) {
1428 neuron = (TNeuron*) curLayer->At(j);
1429 if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1430 else neuron->UpdateSynapsesSequential();
1431 }
1432 }
1433}
1434
1435////////////////////////////////////////////////////////////////////////////////
1436/// just adjust the synapse weights (should be called in batch mode)
1437
1439{
1440 TNeuron* neuron;
1441 Int_t numNeurons;
1442 TObjArray* curLayer;
1443 Int_t numLayers = fNetwork->GetEntriesFast();
1444
1445 for (Int_t i = numLayers-1; i >= 0; i--) {
1446 curLayer = (TObjArray*)fNetwork->At(i);
1447 numNeurons = curLayer->GetEntriesFast();
1448
1449 for (Int_t j = 0; j < numNeurons; j++) {
1450 neuron = (TNeuron*) curLayer->At(j);
1451 neuron->AdjustSynapseWeights();
1452 }
1453 }
1454}
1455
1456////////////////////////////////////////////////////////////////////////////////
1457
1459{
1460 fPrior=0;
1461 fPriorDev.clear();
1462 Int_t nSynapses = fSynapses->GetEntriesFast();
1463 for (Int_t i=0;i<nSynapses;i++) {
1464 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1465 fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1466 fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1467 }
1468}
1469
1470////////////////////////////////////////////////////////////////////////////////
1471
1473{
1474 TMatrixD InvH(0,0);
1475 GetApproxInvHessian(InvH);
1476 Int_t numSynapses=fSynapses->GetEntriesFast();
1477 Int_t numRegulators=fRegulators.size();
1478 Float_t gamma=0,
1479 variance=1.; // Gaussian noise
1480 std::vector<Int_t> nWDP(numRegulators);
1481 std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1482 for (int i=0;i<numSynapses;i++) {
1483 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1484 Int_t idx=fRegulatorIdx[i];
1485 nWDP[idx]++;
1486 trace[idx]+=InvH[i][i];
1487 gamma+=1-fRegulators[idx]*InvH[i][i];
1488 weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1489 }
1490 if (fEstimator==kMSE) {
1491 if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1492 else variance=CalculateEstimator( Types::kTraining, 0 );
1493 }
1494
1495 //Log() << kDEBUG << Endl;
1496 for (int i=0;i<numRegulators;i++)
1497 {
1498 //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
1499 fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1500 if (fRegulators[i]<0) fRegulators[i]=0;
1501 Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
1502 }
1503 float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
1504 float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
1505
1506 Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
1507
1508}
1509
1510////////////////////////////////////////////////////////////////////////////////
1511
1512void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate) //zjh
1513{
1514 Int_t numSynapses=fSynapses->GetEntriesFast();
1515 InvHessian.ResizeTo( numSynapses, numSynapses );
1516 InvHessian=0;
1517 TMatrixD sens(numSynapses,1);
1518 TMatrixD sensT(1,numSynapses);
1519 Int_t nEvents = GetNEvents();
1520 for (Int_t i=0;i<nEvents;i++) {
1521 GetEvent(i);
1522 double outputValue=GetMvaValue(); // force calculation
1523 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1524 CalculateNeuronDeltas();
1525 for (Int_t j = 0; j < numSynapses; j++){
1526 TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1527 synapses->InitDelta();
1528 synapses->CalculateDelta();
1529 sens[j][0]=sensT[0][j]=synapses->GetDelta();
1530 }
1531 if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1532 else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1533 }
1534
1535 // TVectorD eValue(numSynapses);
1536 if (regulate) {
1537 for (Int_t i = 0; i < numSynapses; i++){
1538 InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1539 }
1540 }
1541 else {
1542 for (Int_t i = 0; i < numSynapses; i++){
1543 InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
1544 }
1545 }
1546
1547 InvHessian.Invert();
1548
1549}
1550
1551////////////////////////////////////////////////////////////////////////////////
1552
1554{
1555 Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation
1556
1557 // no hessian (old training file) or no error requested
1558 if (!fCalculateErrors || errLower==0 || errUpper==0)
1559 return MvaValue;
1560
1561 Double_t MvaUpper,MvaLower,median,variance;
1562 Int_t numSynapses=fSynapses->GetEntriesFast();
1563 if (fInvHessian.GetNcols()!=numSynapses) {
1564 Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
1565 }
1566 TMatrixD sens(numSynapses,1);
1567 TMatrixD sensT(1,numSynapses);
1568 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1569 //GetOutputNeuron()->SetError(1.);
1570 CalculateNeuronDeltas();
1571 for (Int_t i = 0; i < numSynapses; i++){
1572 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1573 synapses->InitDelta();
1574 synapses->CalculateDelta();
1575 sensT[0][i]=synapses->GetDelta();
1576 }
1577 sens.Transpose(sensT);
1578 TMatrixD sig=sensT*fInvHessian*sens;
1579 variance=sig[0][0];
1580 median=GetOutputNeuron()->GetValue();
1581
1582 if (variance<0) {
1583 Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl;
1584 variance=0;
1585 }
1586 variance=sqrt(variance);
1587
1588 //upper
1589 MvaUpper=fOutput->Eval(median+variance);
1590 if(errUpper)
1591 *errUpper=MvaUpper-MvaValue;
1592
1593 //lower
1594 MvaLower=fOutput->Eval(median-variance);
1595 if(errLower)
1596 *errLower=MvaValue-MvaLower;
1597
1598 return MvaValue;
1599}
1600
1601
1602#ifdef MethodMLP_UseMinuit__
1603
1604////////////////////////////////////////////////////////////////////////////////
1605/// minimize using Minuit
1606
1607void TMVA::MethodMLP::MinuitMinimize()
1608{
1609 fNumberOfWeights = fSynapses->GetEntriesFast();
1610
1611 TFitter* tfitter = new TFitter( fNumberOfWeights );
1612
1613 // minuit-specific settings
1614 Double_t args[10];
1615
1616 // output level
1617 args[0] = 2; // put to 0 for results only, or to -1 for no garbage
1618 tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
1619 tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
1620
1621 double w[54];
1622
1623 // init parameters
1624 for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1625 TString parName = Form("w%i", ipar);
1626 tfitter->SetParameter( ipar,
1627 parName, w[ipar], 0.1, 0, 0 );
1628 }
1629
1630 // define the CFN function
1631 tfitter->SetFCN( &IFCN );
1632
1633 // define fit strategy
1634 args[0] = 2;
1635 tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
1636
1637 // now do the fit !
1638 args[0] = 1e-04;
1639 tfitter->ExecuteCommand( "MIGRAD", args, 1 );
1640
1641 Bool_t doBetter = kFALSE;
1642 Bool_t doEvenBetter = kFALSE;
1643 if (doBetter) {
1644 args[0] = 1e-04;
1645 tfitter->ExecuteCommand( "IMPROVE", args, 1 );
1646
1647 if (doEvenBetter) {
1648 args[0] = 500;
1649 tfitter->ExecuteCommand( "MINOS", args, 1 );
1650 }
1651 }
1652}
1653
1654////////////////////////////////////////////////////////////////////////////////
1655/// Evaluate the minimisation function
1656///
1657/// Input parameters:
1658/// - npars: number of currently variable parameters
1659/// CAUTION: this is not (necessarily) the dimension of the fitPars vector !
1660/// - fitPars: array of (constant and variable) parameters
1661/// - iflag: indicates what is to be calculated (see example below)
1662/// - grad: array of gradients
1663///
1664/// Output parameters:
1665/// - f: the calculated function value.
1666/// - grad: the (optional) vector of first derivatives).
1667
1668void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1669{
1670 ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1671}
1672
1673TTHREAD_TLS(Int_t) nc = 0;
1674TTHREAD_TLS(double) minf = 1000000;
1675
1676void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1677{
1678 // first update the weights
1679 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1680 TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1681 synapse->SetWeight(fitPars[ipar]);
1682 }
1683
1684 // now compute the estimator
1685 f = CalculateEstimator();
1686
1687 nc++;
1688 if (f < minf) minf = f;
1689 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
1690 Log() << kDEBUG << Endl;
1691 Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
1692}
1693
1694////////////////////////////////////////////////////////////////////////////////
1695/// global "this" pointer to be used in minuit
1696
1697TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1698{
1699 return fgThis;
1700}
1701
1702#endif
1703
1704
1705////////////////////////////////////////////////////////////////////////////////
1706/// write specific classifier response
1707
1708void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1709{
1710 MethodANNBase::MakeClassSpecific(fout, className);
1711}
1712
1713////////////////////////////////////////////////////////////////////////////////
1714/// get help message text
1715///
1716/// typical length of text line:
1717/// "|--------------------------------------------------------------|"
1718
1720{
1721 TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1722 TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1723
1724 Log() << Endl;
1725 Log() << col << "--- Short description:" << colres << Endl;
1726 Log() << Endl;
1727 Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1728 Log() << "forward multilayer perceptron implementation. The MLP has a user-" << Endl;
1729 Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
1730 Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1731 Log() << "signal and one background). " << Endl;
1732 Log() << Endl;
1733 Log() << col << "--- Performance optimisation:" << colres << Endl;
1734 Log() << Endl;
1735 Log() << "Neural networks are stable and performing for a large variety of " << Endl;
1736 Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
1737 Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1738 Log() << "number of input variables that have only little discrimination power. " << Endl;
1739 Log() << "" << Endl;
1740 Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1741 Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1742 Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1743 Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1744 Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
1745 Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
1746 Log() << "competitive results)." << Endl;
1747 Log() << Endl;
1748 Log() << col << "Overtraining: " << colres
1749 << "only the TMlpANN performs an explicit separation of the" << Endl;
1750 Log() << "full training sample into independent training and validation samples." << Endl;
1751 Log() << "We have found that in most high-energy physics applications the " << Endl;
1752 Log() << "available degrees of freedom (training events) are sufficient to " << Endl;
1753 Log() << "constrain the weights of the relatively simple architectures required" << Endl;
1754 Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
1755 Log() << "the use of validation samples would only reduce the available training" << Endl;
1756 Log() << "information. However, if the performance on the training sample is " << Endl;
1757 Log() << "found to be significantly better than the one found with the inde-" << Endl;
1758 Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
1759 Log() << "are printed to standard output at the end of each training job." << Endl;
1760 Log() << Endl;
1761 Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
1762 Log() << Endl;
1763 Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
1764 Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1765 Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
1766 Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1767 Log() << "in favour of more neurons in the first hidden layer." << Endl;
1768 Log() << "" << Endl;
1769 Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
1770 Log() << "adjustable weights is small compared to the training sample size," << Endl;
1771 Log() << "using a large number of training samples should not lead to overtraining." << Endl;
1772}
1773
#define REGISTER_METHOD(CLASS)
for example
SVector< double, 2 > v
Definition: Dict.h:5
#define d(i)
Definition: RSha256.hxx:102
#define f(i)
Definition: RSha256.hxx:104
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
float Float_t
Definition: RtypesCore.h:53
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:363
int type
Definition: TGX11.cxx:120
double sqrt(double)
double exp(double)
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
char * Form(const char *fmt,...)
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter ipar : parameter number parname : parameter name value : initial p...
Definition: TFitter.cxx:581
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
Definition: TFitter.cxx:85
virtual void SetFCN(void(*fcn)(Int_t &, Double_t *, Double_t &f, Double_t *, Int_t))
Specify the address of the fitting algorithm.
Definition: TFitter.cxx:550
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
The TH1 histogram class.
Definition: TH1.h:56
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3251
Bool_t WriteOptionsReference() const
Definition: Config.h:71
Class that contains all the data information.
Definition: DataSetInfo.h:60
std::vector< Float_t > & GetTargets()
Definition: Event.h:98
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition: Event.cxx:382
UInt_t GetClass() const
Definition: Event.h:81
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
Base class for TMVA fitters.
Definition: FitterBase.h:51
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:74
Fitter using a Genetic Algorithm.
Definition: GeneticFitter.h:43
The TMVA::Interval Class.
Definition: Interval.h:61
Base class for all TMVA methods using artificial neural networks.
Definition: MethodANNBase.h:62
virtual void ProcessOptions()
do nothing specific at this moment
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Multilayer Perceptron class built off of MethodANNBase.
Definition: MethodMLP.h:69
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
Definition: MethodMLP.cxx:844
void GetHelpMessage() const
get help message text
Definition: MethodMLP.cxx:1719
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with back propagation algorithm
Definition: MethodMLP.cxx:1041
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1007
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Definition: MethodMLP.cxx:1553
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Definition: MethodMLP.cxx:1708
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
Definition: MethodMLP.cxx:1438
void SteepestDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:778
void TrainOneEpoch()
train network over a single epoch/cycle of events
Definition: MethodMLP.cxx:1146
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Definition: MethodMLP.cxx:154
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Definition: MethodMLP.cxx:791
Double_t ComputeEstimator(std::vector< Double_t > &parameters)
this function is called by GeneticANN for GA optimization
Definition: MethodMLP.cxx:1397
void InitializeLearningRates()
initialize learning rates of synapses, used only by back propagation
Definition: MethodMLP.cxx:280
void CalculateNeuronDeltas()
have each neuron calculate its delta by back propagation
Definition: MethodMLP.cxx:1332
Double_t DerivDir(TMatrixD &Dir)
Definition: MethodMLP.cxx:829
Double_t GetCEErr(const Event *ev, UInt_t index=0)
Definition: MethodMLP.cxx:1024
virtual ~MethodMLP()
destructor nothing to be done
Definition: MethodMLP.cxx:140
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
Definition: MethodMLP.cxx:812
void Shuffle(Int_t *index, Int_t n)
Input:
Definition: MethodMLP.cxx:1194
void SimulateEvent(const Event *ev)
Definition: MethodMLP.cxx:738
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
Definition: MethodMLP.cxx:954
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Definition: MethodMLP.cxx:676
Double_t EstimatorFunction(std::vector< Double_t > &parameters)
interface to the estimate
Definition: MethodMLP.cxx:1389
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
Definition: MethodMLP.cxx:1512
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Definition: MethodMLP.cxx:491
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
Definition: MethodMLP.cxx:1416
void Init()
default initializations
Definition: MethodMLP.cxx:166
void ProcessOptions()
process user options
Definition: MethodMLP.cxx:249
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Definition: MethodMLP.cxx:1262
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
Definition: MethodMLP.cxx:1281
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
Definition: MethodMLP.cxx:1360
Double_t GetError()
Definition: MethodMLP.cxx:970
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum
Definition: MethodMLP.cxx:1212
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
Definition: MethodMLP.cxx:1226
void ComputeDEDw()
Definition: MethodMLP.cxx:701
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
Definition: MethodMLP.cxx:1290
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
Definition: MethodMLP.cxx:92
void UpdateRegulators()
Definition: MethodMLP.cxx:1472
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodMLP.cxx:197
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Definition: MethodMLP.cxx:294
Neuron class used by TMVA artificial neural network methods.
Definition: TNeuron.h:49
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
Definition: TNeuron.cxx:263
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
Definition: TNeuron.cxx:84
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
Definition: TNeuron.cxx:242
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
Definition: TNeuron.cxx:224
void CalculateDelta()
calculate error field
Definition: TNeuron.cxx:115
Synapse class used by TMVA artificial neural network methods.
Definition: TSynapse.h:44
void SetWeight(Double_t weight)
set synapse weight
Definition: TSynapse.cxx:69
Double_t GetWeight()
Definition: TSynapse.h:55
void SetDEDw(Double_t DEDw)
Definition: TSynapse.h:87
void SetLearningRate(Double_t rate)
Definition: TSynapse.h:58
Double_t GetDEDw()
Definition: TSynapse.h:88
void InitDelta()
Definition: TSynapse.h:85
Double_t GetDelta()
Definition: TSynapse.h:89
void DecayLearningRate(Double_t rate)
Definition: TSynapse.h:64
void CalculateDelta()
calculate/adjust the error field for this synapse
Definition: TSynapse.cxx:109
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:190
Double_t NormVariable(Double_t x, Double_t xmin, Double_t xmax)
normalise to output range: [-1, 1]
Definition: Tools.cxx:122
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
Singleton class for Global types used by TMVA.
Definition: Types.h:73
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kClassification
Definition: Types.h:128
@ kRegression
Definition: Types.h:129
@ kTraining
Definition: Types.h:144
@ kTesting
Definition: Types.h:145
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
Definition: TMatrixT.cxx:1210
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
Definition: TMatrixT.cxx:1396
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Definition: TMatrixT.cxx:1469
An array of TObjects.
Definition: TObjArray.h:37
Int_t GetEntriesFast() const
Definition: TObjArray.h:64
TObject * At(Int_t idx) const
Definition: TObjArray.h:165
Basic string class.
Definition: TString.h:131
void SetWeight(Double_t w)
Sets the weight of the synapse.
Definition: TSynapse.cxx:105
Double_t x[n]
Definition: legend1.C:17
const Int_t n
Definition: legend1.C:16
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:146
double gamma(double x)
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Config & gConfig()
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Exp(Double_t x)
Definition: TMath.h:715
Double_t Log(Double_t x)
Definition: TMath.h:748
Double_t Sqrt(Double_t x)
Definition: TMath.h:679
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:348
auto * a
Definition: textangle.C:12