71 #ifdef MethodMLP_UseMinuit__
85 TMVA::MethodMLP::MethodMLP( const
TString& jobName,
90 : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption, theTargetDir ),
91 fUseRegulator(
false), fCalculateErrors(false),
92 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
93 fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
94 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
95 fSamplingTraining(false), fSamplingTesting(false),
96 fLastAlpha(0.0), fTau(0.),
97 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
98 fBPMode(kSequential), fBpModeS("
None"),
99 fBatchSize(0), fTestRate(0), fEpochMon(false),
100 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
101 fGA_SC_rate(0), fGA_SC_factor(0.0),
102 fDeviationsFromTargets(0),
114 fUseRegulator(
false), fCalculateErrors(
false),
115 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
116 fTrainingMethod(kBFGS), fTrainMethodS(
"BFGS"),
117 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
118 fSamplingTraining(
false), fSamplingTesting(
false),
119 fLastAlpha(0.0), fTau(0.),
120 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
121 fBPMode(kSequential), fBpModeS(
"None"),
122 fBatchSize(0), fTestRate(0), fEpochMon(
false),
123 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
124 fGA_SC_rate(0), fGA_SC_factor(0.0),
125 fDeviationsFromTargets(0),
156 SetSignalReferenceCut( 0.5 );
157 #ifdef MethodMLP_UseMinuit__
182 DeclareOptionRef(fTrainMethodS=
"BP",
"TrainingMethod",
183 "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
188 DeclareOptionRef(fLearnRate=0.02,
"LearningRate",
"ANN learning rate parameter");
189 DeclareOptionRef(fDecayRate=0.01,
"DecayRate",
"Decay rate for learning parameter");
190 DeclareOptionRef(fTestRate =10,
"TestRate",
"Test for overtraining performed at each #th epochs");
191 DeclareOptionRef(fEpochMon =
kFALSE,
"EpochMonitoring",
"Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
193 DeclareOptionRef(fSamplingFraction=1.0,
"Sampling",
"Only 'Sampling' (randomly selected) events are trained each epoch");
194 DeclareOptionRef(fSamplingEpoch=1.0,
"SamplingEpoch",
"Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
195 DeclareOptionRef(fSamplingWeight=1.0,
"SamplingImportance",
" The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
197 DeclareOptionRef(fSamplingTraining=
kTRUE,
"SamplingTraining",
"The training sample is sampled");
198 DeclareOptionRef(fSamplingTesting=
kFALSE,
"SamplingTesting" ,
"The testing sample is sampled");
200 DeclareOptionRef(fResetStep=50,
"ResetStep",
"How often BFGS should reset history");
201 DeclareOptionRef(fTau =3.0,
"Tau",
"LineSearch \"size step\"");
203 DeclareOptionRef(fBpModeS=
"sequential",
"BPMode",
204 "Back-propagation learning mode: sequential or batch");
205 AddPreDefVal(
TString(
"sequential"));
206 AddPreDefVal(
TString(
"batch"));
208 DeclareOptionRef(fBatchSize=-1,
"BatchSize",
209 "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
211 DeclareOptionRef(fImprovement=1e-30,
"ConvergenceImprove",
212 "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
214 DeclareOptionRef(fSteps=-1,
"ConvergenceTests",
215 "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
217 DeclareOptionRef(fUseRegulator=
kFALSE,
"UseRegulator",
218 "Use regulator to avoid over-training");
219 DeclareOptionRef(fUpdateLimit=10000,
"UpdateLimit",
220 "Maximum times of regulator update");
221 DeclareOptionRef(fCalculateErrors=
kFALSE,
"CalculateErrors",
222 "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value");
224 DeclareOptionRef(fWeightRange=1.0,
"WeightRange",
225 "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
237 if (IgnoreEventsWithNegWeightsInTraining()) {
239 <<
"Will ignore negative events in training!"
244 if (fTrainMethodS ==
"BP" ) fTrainingMethod = kBP;
245 else if (fTrainMethodS ==
"BFGS") fTrainingMethod = kBFGS;
246 else if (fTrainMethodS ==
"GA" ) fTrainingMethod = kGA;
248 if (fBpModeS ==
"sequential") fBPMode = kSequential;
249 else if (fBpModeS ==
"batch") fBPMode = kBatch;
253 if (fBPMode == kBatch) {
256 if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
267 Int_t numSynapses = fSynapses->GetEntriesFast();
268 for (
Int_t i = 0; i < numSynapses; i++) {
269 synapse = (
TSynapse*)fSynapses->At(i);
281 Log() <<
kFATAL <<
"<CalculateEstimator> fatal error: wrong tree type: " << treeType <<
Endl;
285 Data()->SetCurrentType(treeType);
296 if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
297 histS =
new TH1F( nameS, nameS, nbin, -limit, limit );
298 histB =
new TH1F( nameB, nameB, nbin, -limit, limit );
305 UInt_t nClasses = DataInfo().GetNClasses();
306 UInt_t nTgts = DataInfo().GetNTargets();
310 if( fWeightRange < 1.f ){
311 fDeviationsFromTargets =
new std::vector<std::pair<Float_t,Float_t> >(
nEvents);
316 const Event* ev = GetEvent(i);
318 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
325 ForceNetworkInputs( ev );
326 ForceNetworkCalculations();
329 if (DoRegression()) {
330 for (
UInt_t itgt = 0; itgt < nTgts; itgt++) {
331 v = GetOutputNeuron( itgt )->GetActivationValue();
337 }
else if (DoMulticlass() ) {
339 if (fEstimator==kCE){
341 for (
UInt_t icls = 0; icls < nClasses; icls++) {
342 Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
343 norm +=
exp( activationValue );
345 d =
exp( activationValue );
350 for (
UInt_t icls = 0; icls < nClasses; icls++) {
351 Double_t desired = (icls==cls) ? 1.0 : 0.0;
352 v = GetOutputNeuron( icls )->GetActivationValue();
353 d = (desired-
v)*(desired-
v);
358 Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
359 v = GetOutputNeuron()->GetActivationValue();
360 if (fEstimator==kMSE) d = (desired-
v)*(desired-
v);
365 if( fDeviationsFromTargets )
366 fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
372 if (DataInfo().IsSignal(ev) && histS != 0) histS->
Fill(
float(
v),
float(w) );
373 else if (histB != 0) histB->
Fill(
float(
v),
float(w) );
377 if( fDeviationsFromTargets ) {
378 std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
380 Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
383 Float_t weightRangeCut = fWeightRange*sumOfWeights;
385 for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
386 float deviation = (*itDev).first;
387 float devWeight = (*itDev).second;
388 weightSum += devWeight;
389 if( weightSum <= weightRangeCut ) {
390 estimator += devWeight*deviation;
394 sumOfWeights = sumOfWeightsInRange;
395 delete fDeviationsFromTargets;
398 if (histS != 0) fEpochMonHistS.push_back( histS );
399 if (histB != 0) fEpochMonHistB.push_back( histB );
404 if (DoRegression()) estimator = estimator/
Float_t(sumOfWeights);
405 else if (DoMulticlass()) estimator = estimator/
Float_t(sumOfWeights);
406 else estimator = estimator/
Float_t(sumOfWeights);
411 Data()->SetCurrentType( saveType );
414 if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType ==
Types::kTraining) {
415 CreateWeightMonitoringHists(
Form(
"epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
427 Log() <<
kFATAL <<
"ANN Network is not initialized, doing it now!"<<
Endl;
428 SetAnalysisType(GetAnalysisType());
431 InitializeLearningRates();
432 PrintMessage(
"Training Network");
435 Int_t nSynapses=fSynapses->GetEntriesFast();
436 if (nSynapses>nEvents)
437 Log()<<
kWARNING<<
"ANN too complicated: #events="<<nEvents<<
"\t#synapses="<<nSynapses<<
Endl;
439 #ifdef MethodMLP_UseMinuit__
440 if (useMinuit) MinuitMinimize();
442 if (fTrainingMethod == kGA) GeneticMinimize();
443 else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
444 else BackPropagationMinimize(nEpochs);
450 Log()<<
kINFO<<
"Finalizing handling of Regulator terms, trainE="<<trainE<<
" testE="<<testE<<
Endl;
452 Log()<<
kINFO<<
"Done with handling of Regulator terms"<<
Endl;
455 if( fCalculateErrors || fUseRegulator )
457 Int_t numSynapses=fSynapses->GetEntriesFast();
458 fInvHessian.ResizeTo(numSynapses,numSynapses);
459 GetApproxInvHessian( fInvHessian ,
false);
468 Timer timer( (fSteps>0?100:nEpochs), GetName() );
472 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
473 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
474 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
475 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
477 Int_t nSynapses = fSynapses->GetEntriesFast();
478 Int_t nWeights = nSynapses;
480 for (
Int_t i=0;i<nSynapses;i++) {
485 std::vector<Double_t>
buffer( nWeights );
486 for (
Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
489 TMatrixD Hessian ( nWeights, nWeights );
493 Int_t RegUpdateTimes=0;
501 if(fSamplingTraining || fSamplingTesting)
502 Data()->InitSampling(1.0,1.0,fRandomSeed);
504 if (fSteps > 0)
Log() <<
kINFO <<
"Inaccurate progress timing for MLP... " <<
Endl;
508 for (
Int_t i = 0; i < nEpochs; i++) {
509 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
510 if ((i+1)%fTestRate == 0 || (i == 0)) {
511 if (fSamplingTraining) {
513 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
514 Data()->CreateSampling();
516 if (fSamplingTesting) {
518 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
519 Data()->CreateSampling();
525 Data()->InitSampling(1.0,1.0);
527 Data()->InitSampling(1.0,1.0);
538 SetGammaDelta( Gamma, Delta, buffer );
540 if (i % fResetStep == 0 && i<0.5*nEpochs) {
546 if (GetHessian( Hessian, Gamma, Delta )) {
551 else SetDir( Hessian, Dir );
555 if (DerivDir( Dir ) > 0) {
560 if (LineSearch( Dir, buffer, &dError )) {
564 if (LineSearch(Dir, buffer, &dError)) {
566 Log() <<
kFATAL <<
"Line search failed! Huge troubles somewhere..." <<
Endl;
574 if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 &&
fabs(dError)<0.1*AccuError) {
575 Log()<<
kDEBUG<<
"\n\nUpdate regulators "<<RegUpdateTimes<<
" on epoch "<<i<<
"\tdError="<<dError<<
Endl;
585 if ((i+1)%fTestRate == 0) {
590 fEstimatorHistTrain->Fill( i+1, trainE );
591 fEstimatorHistTest ->Fill( i+1, testE );
594 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
597 Data()->EventResult( success );
599 SetCurrentValue( testE );
600 if (HasConverged()) {
601 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
602 Int_t newEpoch =
Int_t(fSamplingEpoch*nEpochs);
604 ResetConvergenceCounter();
611 TString convText =
Form(
"<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i );
614 if (
Float_t(i)/nEpochs < fSamplingEpoch)
616 progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
620 progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
622 Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit;
623 if (progress2>progress) progress=progress2;
628 if (progress<i) progress=i;
644 Int_t nWeights = fSynapses->GetEntriesFast();
647 Int_t nSynapses = fSynapses->GetEntriesFast();
648 for (
Int_t i=0;i<nSynapses;i++) {
650 Gamma[IDX++][0] = -synapse->
GetDEDw();
653 for (
Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
658 for (
Int_t i=0;i<nSynapses;i++)
661 Gamma[IDX++][0] += synapse->
GetDEDw();
669 Int_t nSynapses = fSynapses->GetEntriesFast();
670 for (
Int_t i=0;i<nSynapses;i++) {
679 const Event* ev = GetEvent(i);
680 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
688 for (
Int_t j=0;j<nSynapses;j++) {
694 for (
Int_t i=0;i<nSynapses;i++) {
697 if (fUseRegulator) DEDw+=fPriorDev[i];
698 synapse->
SetDEDw( DEDw / nPosEvents );
708 ForceNetworkInputs( ev );
709 ForceNetworkCalculations();
711 if (DoRegression()) {
712 UInt_t ntgt = DataInfo().GetNTargets();
713 for (
UInt_t itgt = 0; itgt < ntgt; itgt++) {
715 Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
716 GetOutputNeuron( itgt )->SetError(error);
718 }
else if (DoMulticlass()) {
719 UInt_t nClasses = DataInfo().GetNClasses();
721 for (
UInt_t icls = 0; icls < nClasses; icls++) {
722 Double_t desired = ( cls==icls ? 1.0 : 0.0 );
723 Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
724 GetOutputNeuron( icls )->SetError(error);
727 Double_t desired = GetDesiredOutput( ev );
729 if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight;
730 else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired);
731 GetOutputNeuron()->SetError(error);
734 CalculateNeuronDeltas();
735 for (
Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
747 Int_t nSynapses = fSynapses->GetEntriesFast();
749 for (
Int_t i=0;i<nSynapses;i++) {
751 Dir[IDX++][0] = -synapse->
GetDEDw();
781 Int_t nSynapses = fSynapses->GetEntriesFast();
784 for (
Int_t i=0;i<nSynapses;i++) {
786 DEDw[IDX++][0] = synapse->
GetDEDw();
789 dir = Hessian * DEDw;
790 for (
Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
798 Int_t nSynapses = fSynapses->GetEntriesFast();
801 for (
Int_t i=0;i<nSynapses;i++) {
803 Result += Dir[IDX++][0] * synapse->
GetDEDw();
813 Int_t nSynapses = fSynapses->GetEntriesFast();
814 Int_t nWeights = nSynapses;
816 std::vector<Double_t> Origin(nWeights);
817 for (
Int_t i=0;i<nSynapses;i++) {
828 if (alpha2 < 0.01) alpha2 = 0.01;
829 else if (alpha2 > 2.0) alpha2 = 2.0;
833 SetDirWeights( Origin, Dir, alpha2 );
841 for (
Int_t i=0;i<100;i++) {
843 SetDirWeights(Origin, Dir, alpha3);
855 SetDirWeights(Origin, Dir, 0.);
860 for (
Int_t i=0;i<100;i++) {
863 Log() <<
kWARNING <<
"linesearch, starting to investigate direction opposite of steepestDIR" <<
Endl;
864 alpha2 = -alpha_original;
866 SetDirWeights(Origin, Dir, alpha2);
876 SetDirWeights(Origin, Dir, 0.);
877 Log() <<
kWARNING <<
"linesearch, failed even in opposite direction of steepestDIR" <<
Endl;
883 if (alpha1>0 && alpha2>0 && alpha3 > 0) {
884 fLastAlpha = 0.5 * (alpha1 + alpha3 -
885 (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
886 - ( err2 - err1 ) / (alpha2 - alpha1 )));
892 fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
894 SetDirWeights(Origin, Dir, fLastAlpha);
901 if (finalError > err1) {
902 Log() <<
kWARNING <<
"Line search increased error! Something is wrong."
903 <<
"fLastAlpha=" << fLastAlpha <<
"al123=" << alpha1 <<
" "
904 << alpha2 <<
" " << alpha3 <<
" err1="<< err1 <<
" errfinal=" << finalError <<
Endl;
907 for (
Int_t i=0;i<nSynapses;i++) {
909 buffer[IDX] = synapse->
GetWeight() - Origin[IDX];
913 if (dError) (*dError)=(errOrigin-finalError)/finalError;
923 Int_t nSynapses = fSynapses->GetEntriesFast();
925 for (
Int_t i=0;i<nSynapses;i++) {
927 synapse->
SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
930 if (fUseRegulator) UpdatePriors();
939 UInt_t ntgts = GetNTargets();
943 const Event* ev = GetEvent(i);
945 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
952 if (DoRegression()) {
953 for (
UInt_t itgt = 0; itgt < ntgts; itgt++) {
954 error += GetMSEErr( ev, itgt );
956 }
else if ( DoMulticlass() ){
957 for(
UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
958 error += GetMSEErr( ev, icls );
961 if (fEstimator==kMSE) error = GetMSEErr( ev );
962 else if (fEstimator==kCE) error= GetCEErr( ev );
966 if (fUseRegulator) Result+=fPrior;
967 if (Result<0)
Log()<<
kWARNING<<
"\nNegative Error!!! :"<<Result-fPrior<<
"+"<<fPrior<<
Endl;
978 if (DoRegression()) target = ev->
GetTarget( index );
979 else if (DoMulticlass()) target = (ev->
GetClass() == index ? 1.0 : 0.0 );
980 else target = GetDesiredOutput( ev );
982 error = 0.5*(output-target)*(output-target);
995 if (DoRegression()) target = ev->
GetTarget( index );
996 else if (DoMulticlass()) target = (ev->
GetClass() == index ? 1.0 : 0.0 );
997 else target = GetDesiredOutput( ev );
1010 Timer timer( (fSteps>0?100:nEpochs), GetName() );
1015 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
1016 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
1017 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
1018 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
1020 if(fSamplingTraining || fSamplingTesting)
1021 Data()->InitSampling(1.0,1.0,fRandomSeed);
1023 if (fSteps > 0)
Log() <<
kINFO <<
"Inaccurate progress timing for MLP... " <<
Endl;
1031 for (
Int_t i = 0; i < nEpochs; i++) {
1033 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
1034 if ((i+1)%fTestRate == 0 || (i == 0)) {
1035 if (fSamplingTraining) {
1037 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1038 Data()->CreateSampling();
1040 if (fSamplingTesting) {
1042 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1043 Data()->CreateSampling();
1049 Data()->InitSampling(1.0,1.0);
1051 Data()->InitSampling(1.0,1.0);
1056 DecaySynapseWeights(i >= lateEpoch);
1059 if ((i+1)%fTestRate == 0) {
1062 fEstimatorHistTrain->Fill( i+1, trainE );
1063 fEstimatorHistTest ->Fill( i+1, testE );
1066 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1069 Data()->EventResult( success );
1071 SetCurrentValue( testE );
1072 if (HasConverged()) {
1073 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
1074 Int_t newEpoch =
Int_t(fSamplingEpoch*nEpochs);
1076 ResetConvergenceCounter();
1079 if (lateEpoch > i) lateEpoch = i;
1086 TString convText =
Form(
"<D^2> (train/test): %.4g/%.4g", trainE, testE );
1089 if (
Float_t(i)/nEpochs < fSamplingEpoch)
1090 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1092 progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1112 Shuffle(index, nEvents);
1117 const Event * ev = GetEvent(index[i]);
1118 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1123 TrainOneEvent(index[i]);
1126 if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1127 AdjustSynapseWeights();
1128 if (fgPRINT_BATCH) {
1156 for (
Int_t i = 0; i <
n; i++) {
1157 j = (
Int_t) (frgen->Rndm() *
a);
1160 index[j] = index[i];
1173 Int_t numSynapses = fSynapses->GetEntriesFast();
1174 for (
Int_t i = 0; i < numSynapses; i++) {
1175 synapse = (
TSynapse*)fSynapses->At(i);
1198 if (type == 0) desired = fOutput->GetMin();
1199 else desired = fOutput->GetMax();
1205 for (
UInt_t j = 0; j < GetNvar(); j++) {
1208 neuron = GetInputNeuron(j);
1212 ForceNetworkCalculations();
1213 UpdateNetwork(desired, eventWeight);
1227 const Event * ev = GetEvent(ievt);
1229 ForceNetworkInputs( ev );
1230 ForceNetworkCalculations();
1231 if (DoRegression()) UpdateNetwork( ev->
GetTargets(), eventWeight );
1232 if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1233 else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1241 return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin();
1251 Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1252 if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ;
1253 else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired);
1254 else Log() <<
kFATAL <<
"Estimator type unspecified!!" <<
Endl;
1255 error *= eventWeight;
1256 GetOutputNeuron()->SetError(error);
1257 CalculateNeuronDeltas();
1267 for (
UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1268 Double_t error = GetOutputNeuron( i )->GetActivationValue() - desired.at(i);
1269 error *= eventWeight;
1270 GetOutputNeuron( i )->SetError(error);
1272 CalculateNeuronDeltas();
1284 Int_t numLayers = fNetwork->GetEntriesFast();
1289 for (
Int_t i = numLayers-1; i >= 0; i--) {
1293 for (
Int_t j = 0; j < numNeurons; j++) {
1310 PrintMessage(
"Minimizing Estimator with GA");
1316 fGA_SC_factor = 0.95;
1320 std::vector<Interval*> ranges;
1322 Int_t numWeights = fSynapses->GetEntriesFast();
1323 for (
Int_t ivar=0; ivar< numWeights; ivar++) {
1324 ranges.push_back(
new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1330 Double_t estimator = CalculateEstimator();
1331 Log() <<
kINFO <<
"GA: estimator after optimization: " << estimator <<
Endl;
1339 return ComputeEstimator( parameters );
1348 Int_t numSynapses = fSynapses->GetEntriesFast();
1350 for (
Int_t i = 0; i < numSynapses; i++) {
1351 synapse = (
TSynapse*)fSynapses->At(i);
1354 if (fUseRegulator) UpdatePriors();
1356 Double_t estimator = CalculateEstimator();
1371 for (
Int_t i = 0; i < numLayers; i++) {
1375 for (
Int_t j = 0; j < numNeurons; j++) {
1393 for (
Int_t i = numLayers-1; i >= 0; i--) {
1397 for (
Int_t j = 0; j < numNeurons; j++) {
1410 Int_t nSynapses = fSynapses->GetEntriesFast();
1411 for (
Int_t i=0;i<nSynapses;i++) {
1413 fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->
GetWeight())*(synapse->
GetWeight());
1414 fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->
GetWeight()));
1423 GetApproxInvHessian(InvH);
1424 Int_t numSynapses=fSynapses->GetEntriesFast();
1425 Int_t numRegulators=fRegulators.size();
1428 std::vector<Int_t> nWDP(numRegulators);
1429 std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1430 for (
int i=0;i<numSynapses;i++) {
1432 Int_t idx=fRegulatorIdx[i];
1434 trace[idx]+=InvH[i][i];
1435 gamma+=1-fRegulators[idx]*InvH[i][i];
1438 if (fEstimator==kMSE) {
1439 if (GetNEvents()>
gamma) variance=CalculateEstimator(
Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1444 for (
int i=0;i<numRegulators;i++)
1447 fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1448 if (fRegulators[i]<0) fRegulators[i]=0;
1449 Log()<<
kDEBUG<<
"R"<<i<<
":"<<fRegulators[i]<<
"\t";
1454 Log()<<
kDEBUG<<
"\n"<<
"trainE:"<<trainE<<
"\ttestE:"<<testE<<
"\tvariance:"<<variance<<
"\tgamma:"<<gamma<<
Endl;
1462 Int_t numSynapses=fSynapses->GetEntriesFast();
1463 InvHessian.
ResizeTo( numSynapses, numSynapses );
1470 double outputValue=GetMvaValue();
1471 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1472 CalculateNeuronDeltas();
1473 for (
Int_t j = 0; j < numSynapses; j++){
1477 sens[j][0]=sensT[0][j]=synapses->
GetDelta();
1479 if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1480 else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1485 for (
Int_t i = 0; i < numSynapses; i++){
1486 InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1490 for (
Int_t i = 0; i < numSynapses; i++){
1491 InvHessian[i][i]+=1e-6;
1506 if (!fCalculateErrors || errLower==0 || errUpper==0)
1509 Double_t MvaUpper,MvaLower,median,variance;
1510 Int_t numSynapses=fSynapses->GetEntriesFast();
1511 if (fInvHessian.GetNcols()!=numSynapses) {
1512 Log() <<
kWARNING <<
"inconsistent dimension " << fInvHessian.GetNcols() <<
" vs " << numSynapses <<
Endl;
1516 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1518 CalculateNeuronDeltas();
1519 for (
Int_t i = 0; i < numSynapses; i++){
1526 TMatrixD sig=sensT*fInvHessian*sens;
1528 median=GetOutputNeuron()->GetValue();
1531 Log()<<
kWARNING<<
"Negative variance!!! median=" << median <<
"\tvariance(sigma^2)=" << variance <<
Endl;
1534 variance=
sqrt(variance);
1537 MvaUpper=fOutput->Eval(median+variance);
1539 *errUpper=MvaUpper-MvaValue;
1542 MvaLower=fOutput->Eval(median-variance);
1544 *errLower=MvaValue-MvaLower;
1550 #ifdef MethodMLP_UseMinuit__
1555 void TMVA::MethodMLP::MinuitMinimize()
1557 fNumberOfWeights = fSynapses->GetEntriesFast();
1572 for (
Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1575 parName, w[ipar], 0.1, 0, 0 );
1579 tfitter->
SetFCN( &IFCN );
1602 _____________________________________________________________________________
1618 ((MethodMLP*)GetThisPtr())->
FCN( npars, grad, f, fitPars, iflag );
1621 TTHREAD_TLS(
Int_t) nc = 0;
1622 TTHREAD_TLS(
double) minf = 1000000;
1627 for (
Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1633 f = CalculateEstimator();
1636 if (f < minf) minf =
f;
1637 for (
Int_t ipar=0; ipar<fNumberOfWeights; ipar++)
Log() <<
kDEBUG << fitPars[ipar] <<
" ";
1639 Log() <<
kDEBUG <<
"***** New estimator: " << f <<
" min: " << minf <<
" --> ncalls: " << nc <<
Endl;
1673 Log() << col <<
"--- Short description:" << colres <<
Endl;
1675 Log() <<
"The MLP artificial neural network (ANN) is a traditional feed-" <<
Endl;
1676 Log() <<
"forward multilayer perceptron impementation. The MLP has a user-" <<
Endl;
1677 Log() <<
"defined hidden layer architecture, while the number of input (output)" <<
Endl;
1678 Log() <<
"nodes is determined by the input variables (output classes, i.e., " <<
Endl;
1679 Log() <<
"signal and one background). " <<
Endl;
1681 Log() << col <<
"--- Performance optimisation:" << colres <<
Endl;
1683 Log() <<
"Neural networks are stable and performing for a large variety of " <<
Endl;
1684 Log() <<
"linear and non-linear classification problems. However, in contrast" <<
Endl;
1685 Log() <<
"to (e.g.) boosted decision trees, the user is advised to reduce the " <<
Endl;
1686 Log() <<
"number of input variables that have only little discrimination power. " <<
Endl;
1688 Log() <<
"In the tests we have carried out so far, the MLP and ROOT networks" <<
Endl;
1689 Log() <<
"(TMlpANN, interfaced via TMVA) performed equally well, with however" <<
Endl;
1690 Log() <<
"a clear speed advantage for the MLP. The Clermont-Ferrand neural " <<
Endl;
1691 Log() <<
"net (CFMlpANN) exhibited worse classification performance in these" <<
Endl;
1692 Log() <<
"tests, which is partly due to the slow convergence of its training" <<
Endl;
1693 Log() <<
"(at least 10k training cycles are required to achieve approximately" <<
Endl;
1694 Log() <<
"competitive results)." <<
Endl;
1696 Log() << col <<
"Overtraining: " << colres
1697 <<
"only the TMlpANN performs an explicit separation of the" <<
Endl;
1698 Log() <<
"full training sample into independent training and validation samples." <<
Endl;
1699 Log() <<
"We have found that in most high-energy physics applications the " <<
Endl;
1700 Log() <<
"avaliable degrees of freedom (training events) are sufficient to " <<
Endl;
1701 Log() <<
"constrain the weights of the relatively simple architectures required" <<
Endl;
1702 Log() <<
"to achieve good performance. Hence no overtraining should occur, and " <<
Endl;
1703 Log() <<
"the use of validation samples would only reduce the available training" <<
Endl;
1704 Log() <<
"information. However, if the perrormance on the training sample is " <<
Endl;
1705 Log() <<
"found to be significantly better than the one found with the inde-" <<
Endl;
1706 Log() <<
"pendent test sample, caution is needed. The results for these samples " <<
Endl;
1707 Log() <<
"are printed to standard output at the end of each training job." <<
Endl;
1709 Log() << col <<
"--- Performance tuning via configuration options:" << colres <<
Endl;
1711 Log() <<
"The hidden layer architecture for all ANNs is defined by the option" <<
Endl;
1712 Log() <<
"\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" <<
Endl;
1713 Log() <<
"neurons and the second N neurons (and so on), and where N is the number " <<
Endl;
1714 Log() <<
"of input variables. Excessive numbers of hidden layers should be avoided," <<
Endl;
1715 Log() <<
"in favour of more neurons in the first hidden layer." <<
Endl;
1717 Log() <<
"The number of cycles should be above 500. As said, if the number of" <<
Endl;
1718 Log() <<
"adjustable weights is small compared to the training sample size," <<
Endl;
1719 Log() <<
"using a large number of training samples should not lead to overtraining." <<
Endl;
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
void GetHelpMessage() const
get help message text
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
MsgLogger & Endl(MsgLogger &ml)
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
virtual ~MethodMLP()
destructor nothing to be done
void Init()
default initializations
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
void DecayLearningRate(Double_t rate)
void SetDEDw(Double_t DEDw)
void TrainOneEpoch()
train network over a single epoch/cyle of events
void SteepestDir(TMatrixD &Dir)
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
1-D histogram with a float per channel (see TH1 documentation)}
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: TrainingMetho...
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Int_t GetEntriesFast() const
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
ClassImp(TIterator) Bool_t TIterator return false
Compare two iterator objects.
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
const char * Data() const
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
void Shuffle(Int_t *index, Int_t n)
Input: index: the array to shuffle n: the size of the array Output: index: the shuffled indexes This ...
Double_t Run()
estimator function interface for fitting
void CalculateDelta()
calculate/adjust the error field for this synapse
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
std::vector< std::vector< double > > Data
virtual void ProcessOptions()
do nothing specific at this moment
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
std::vector< Float_t > & GetTargets()
Double_t GetCEErr(const Event *ev, UInt_t index=0)
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
TMatrixT< Double_t > TMatrixD
void InitializeLearningRates()
initialize learning rates of synapses, used only by backpropagation
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum ...
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
void SetLearningRate(Double_t rate)
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with backpropagation algorithm
char * Form(const char *fmt,...)
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
virtual void SetFCN(void *fcn)
Specify the address of the fitting algorithm (from the interpreter)
void(* FCN)(Int_t &npar, Double_t *gin, Double_t &f, Double_t *u, Int_t flag)
void SetWeight(Double_t w)
Sets the weight of the synapse.
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
void CalculateNeuronDeltas()
have each neuron calculate its delta by backpropagation
Describe directory structure in memory.
void SimulateEvent(const Event *ev)
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
void SetWeight(Double_t weight)
set synapse weight
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Float_t GetTarget(UInt_t itgt) const
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption, TDirectory *theTargetDir=0)
#define REGISTER_METHOD(CLASS)
for example
void ProcessOptions()
process user options
Double_t EstimatorFunction(std::vector< Double_t > ¶meters)
interface to the estimate
Double_t ComputeEstimator(std::vector< Double_t > ¶meters)
this function is called by GeneticANN for GA optimization
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Bool_t WriteOptionsReference() const
Double_t Sqrt(Double_t x)
TObject * At(Int_t idx) const
Double_t DerivDir(TMatrixD &Dir)
static void output(int code)
void CalculateDelta()
calculate error field
double norm(double *x, double *p)
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter ipar : parameter number parname : parameter name value : initial p...