64 #ifdef MethodMLP_UseMinuit__
83 : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption, theTargetDir ),
84 fUseRegulator(
false), fCalculateErrors(false),
85 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
86 fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
87 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
88 fSamplingTraining(false), fSamplingTesting(false),
89 fLastAlpha(0.0), fTau(0.),
90 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
91 fBPMode(kSequential), fBpModeS("
None"),
92 fBatchSize(0), fTestRate(0), fEpochMon(false),
93 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
94 fGA_SC_rate(0), fGA_SC_factor(0.0),
95 fDeviationsFromTargets(0),
107 fUseRegulator(
false), fCalculateErrors(
false),
108 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
109 fTrainingMethod(kBFGS), fTrainMethodS(
"BFGS"),
110 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
111 fSamplingTraining(
false), fSamplingTesting(
false),
112 fLastAlpha(0.0), fTau(0.),
113 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
114 fBPMode(kSequential), fBpModeS(
"None"),
115 fBatchSize(0), fTestRate(0), fEpochMon(
false),
116 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
117 fGA_SC_rate(0), fGA_SC_factor(0.0),
118 fDeviationsFromTargets(0),
149 SetSignalReferenceCut( 0.5 );
150 #ifdef MethodMLP_UseMinuit__
175 DeclareOptionRef(fTrainMethodS=
"BP",
"TrainingMethod",
176 "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
181 DeclareOptionRef(fLearnRate=0.02,
"LearningRate",
"ANN learning rate parameter");
182 DeclareOptionRef(fDecayRate=0.01,
"DecayRate",
"Decay rate for learning parameter");
183 DeclareOptionRef(fTestRate =10,
"TestRate",
"Test for overtraining performed at each #th epochs");
184 DeclareOptionRef(fEpochMon =
kFALSE,
"EpochMonitoring",
"Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
186 DeclareOptionRef(fSamplingFraction=1.0,
"Sampling",
"Only 'Sampling' (randomly selected) events are trained each epoch");
187 DeclareOptionRef(fSamplingEpoch=1.0,
"SamplingEpoch",
"Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
188 DeclareOptionRef(fSamplingWeight=1.0,
"SamplingImportance",
" The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
190 DeclareOptionRef(fSamplingTraining=
kTRUE,
"SamplingTraining",
"The training sample is sampled");
191 DeclareOptionRef(fSamplingTesting=
kFALSE,
"SamplingTesting" ,
"The testing sample is sampled");
193 DeclareOptionRef(fResetStep=50,
"ResetStep",
"How often BFGS should reset history");
194 DeclareOptionRef(fTau =3.0,
"Tau",
"LineSearch \"size step\"");
196 DeclareOptionRef(fBpModeS=
"sequential",
"BPMode",
197 "Back-propagation learning mode: sequential or batch");
198 AddPreDefVal(
TString(
"sequential"));
199 AddPreDefVal(
TString(
"batch"));
201 DeclareOptionRef(fBatchSize=-1,
"BatchSize",
202 "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
204 DeclareOptionRef(fImprovement=1e-30,
"ConvergenceImprove",
205 "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
207 DeclareOptionRef(fSteps=-1,
"ConvergenceTests",
208 "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
210 DeclareOptionRef(fUseRegulator=
kFALSE,
"UseRegulator",
211 "Use regulator to avoid over-training");
212 DeclareOptionRef(fUpdateLimit=10000,
"UpdateLimit",
213 "Maximum times of regulator update");
214 DeclareOptionRef(fCalculateErrors=
kFALSE,
"CalculateErrors",
215 "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value");
217 DeclareOptionRef(fWeightRange=1.0,
"WeightRange",
218 "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
230 if (IgnoreEventsWithNegWeightsInTraining()) {
232 <<
"Will ignore negative events in training!"
237 if (fTrainMethodS ==
"BP" ) fTrainingMethod = kBP;
238 else if (fTrainMethodS ==
"BFGS") fTrainingMethod = kBFGS;
239 else if (fTrainMethodS ==
"GA" ) fTrainingMethod = kGA;
241 if (fBpModeS ==
"sequential") fBPMode = kSequential;
242 else if (fBpModeS ==
"batch") fBPMode = kBatch;
246 if (fBPMode == kBatch) {
249 if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
260 Int_t numSynapses = fSynapses->GetEntriesFast();
261 for (
Int_t i = 0; i < numSynapses; i++) {
262 synapse = (
TSynapse*)fSynapses->At(i);
274 Log() <<
kFATAL <<
"<CalculateEstimator> fatal error: wrong tree type: " << treeType <<
Endl;
278 Data()->SetCurrentType(treeType);
289 if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
290 histS =
new TH1F( nameS, nameS, nbin, -limit, limit );
291 histB =
new TH1F( nameB, nameB, nbin, -limit, limit );
298 UInt_t nClasses = DataInfo().GetNClasses();
299 UInt_t nTgts = DataInfo().GetNTargets();
303 if( fWeightRange < 1.f ){
304 fDeviationsFromTargets =
new std::vector<std::pair<Float_t,Float_t> >(
nEvents);
309 const Event* ev = GetEvent(i);
311 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
318 ForceNetworkInputs( ev );
319 ForceNetworkCalculations();
322 if (DoRegression()) {
323 for (
UInt_t itgt = 0; itgt < nTgts; itgt++) {
324 v = GetOutputNeuron( itgt )->GetActivationValue();
330 }
else if (DoMulticlass() ) {
332 if (fEstimator==kCE){
334 for (
UInt_t icls = 0; icls < nClasses; icls++) {
335 Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
336 norm +=
exp( activationValue );
338 d =
exp( activationValue );
343 for (
UInt_t icls = 0; icls < nClasses; icls++) {
344 Double_t desired = (icls==cls) ? 1.0 : 0.0;
345 v = GetOutputNeuron( icls )->GetActivationValue();
346 d = (desired-
v)*(desired-
v);
351 Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
352 v = GetOutputNeuron()->GetActivationValue();
353 if (fEstimator==kMSE) d = (desired-
v)*(desired-
v);
358 if( fDeviationsFromTargets )
359 fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
365 if (DataInfo().IsSignal(ev) && histS != 0) histS->
Fill(
float(
v),
float(w) );
366 else if (histB != 0) histB->
Fill(
float(
v),
float(w) );
370 if( fDeviationsFromTargets ) {
371 std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
373 Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
376 Float_t weightRangeCut = fWeightRange*sumOfWeights;
378 for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
379 float deviation = (*itDev).first;
380 float devWeight = (*itDev).second;
381 weightSum += devWeight;
382 if( weightSum <= weightRangeCut ) {
383 estimator += devWeight*deviation;
387 sumOfWeights = sumOfWeightsInRange;
388 delete fDeviationsFromTargets;
391 if (histS != 0) fEpochMonHistS.push_back( histS );
392 if (histB != 0) fEpochMonHistB.push_back( histB );
397 if (DoRegression()) estimator = estimator/
Float_t(sumOfWeights);
398 else if (DoMulticlass()) estimator = estimator/
Float_t(sumOfWeights);
399 else estimator = estimator/
Float_t(sumOfWeights);
404 Data()->SetCurrentType( saveType );
407 if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType ==
Types::kTraining) {
408 CreateWeightMonitoringHists(
Form(
"epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
420 Log() <<
kFATAL <<
"ANN Network is not initialized, doing it now!"<<
Endl;
421 SetAnalysisType(GetAnalysisType());
424 InitializeLearningRates();
425 PrintMessage(
"Training Network");
428 Int_t nSynapses=fSynapses->GetEntriesFast();
429 if (nSynapses>nEvents)
430 Log()<<
kWARNING<<
"ANN too complicated: #events="<<nEvents<<
"\t#synapses="<<nSynapses<<
Endl;
432 #ifdef MethodMLP_UseMinuit__
433 if (useMinuit) MinuitMinimize();
435 if (fTrainingMethod == kGA) GeneticMinimize();
436 else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
437 else BackPropagationMinimize(nEpochs);
443 Log()<<
kINFO<<
"Finalizing handling of Regulator terms, trainE="<<trainE<<
" testE="<<testE<<
Endl;
445 Log()<<
kINFO<<
"Done with handling of Regulator terms"<<
Endl;
448 if( fCalculateErrors || fUseRegulator )
450 Int_t numSynapses=fSynapses->GetEntriesFast();
451 fInvHessian.ResizeTo(numSynapses,numSynapses);
452 GetApproxInvHessian( fInvHessian ,
false);
461 Timer timer( (fSteps>0?100:nEpochs), GetName() );
465 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
466 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
467 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
468 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
470 Int_t nSynapses = fSynapses->GetEntriesFast();
471 Int_t nWeights = nSynapses;
473 for (
Int_t i=0;i<nSynapses;i++) {
478 std::vector<Double_t> buffer( nWeights );
479 for (
Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
482 TMatrixD Hessian ( nWeights, nWeights );
486 Int_t RegUpdateTimes=0;
494 if(fSamplingTraining || fSamplingTesting)
495 Data()->InitSampling(1.0,1.0,fRandomSeed);
497 if (fSteps > 0)
Log() <<
kINFO <<
"Inaccurate progress timing for MLP... " <<
Endl;
501 for (
Int_t i = 0; i < nEpochs; i++) {
502 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
503 if ((i+1)%fTestRate == 0 || (i == 0)) {
504 if (fSamplingTraining) {
506 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
507 Data()->CreateSampling();
509 if (fSamplingTesting) {
511 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
512 Data()->CreateSampling();
518 Data()->InitSampling(1.0,1.0);
520 Data()->InitSampling(1.0,1.0);
531 SetGammaDelta( Gamma, Delta, buffer );
533 if (i % fResetStep == 0 && i<0.5*nEpochs) {
539 if (GetHessian( Hessian, Gamma, Delta )) {
544 else SetDir( Hessian, Dir );
548 if (DerivDir( Dir ) > 0) {
553 if (LineSearch( Dir, buffer, &dError )) {
557 if (LineSearch(Dir, buffer, &dError)) {
559 Log() <<
kFATAL <<
"Line search failed! Huge troubles somewhere..." <<
Endl;
567 if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 &&
fabs(dError)<0.1*AccuError) {
568 Log()<<
kDEBUG<<
"\n\nUpdate regulators "<<RegUpdateTimes<<
" on epoch "<<i<<
"\tdError="<<dError<<
Endl;
578 if ((i+1)%fTestRate == 0) {
583 fEstimatorHistTrain->Fill( i+1, trainE );
584 fEstimatorHistTest ->Fill( i+1, testE );
587 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
590 Data()->EventResult( success );
592 SetCurrentValue( testE );
593 if (HasConverged()) {
594 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
595 Int_t newEpoch =
Int_t(fSamplingEpoch*nEpochs);
597 ResetConvergenceCounter();
604 TString convText =
Form(
"<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i );
607 if (
Float_t(i)/nEpochs < fSamplingEpoch)
609 progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
613 progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
615 Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit;
616 if (progress2>progress) progress=progress2;
621 if (progress<i) progress=i;
637 Int_t nWeights = fSynapses->GetEntriesFast();
640 Int_t nSynapses = fSynapses->GetEntriesFast();
641 for (
Int_t i=0;i<nSynapses;i++) {
643 Gamma[IDX++][0] = -synapse->
GetDEDw();
646 for (
Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
651 for (
Int_t i=0;i<nSynapses;i++)
654 Gamma[IDX++][0] += synapse->
GetDEDw();
662 Int_t nSynapses = fSynapses->GetEntriesFast();
663 for (
Int_t i=0;i<nSynapses;i++) {
672 const Event* ev = GetEvent(i);
673 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
681 for (
Int_t j=0;j<nSynapses;j++) {
687 for (
Int_t i=0;i<nSynapses;i++) {
690 if (fUseRegulator) DEDw+=fPriorDev[i];
691 synapse->
SetDEDw( DEDw / nPosEvents );
701 ForceNetworkInputs( ev );
702 ForceNetworkCalculations();
704 if (DoRegression()) {
705 UInt_t ntgt = DataInfo().GetNTargets();
706 for (
UInt_t itgt = 0; itgt < ntgt; itgt++) {
708 Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
709 GetOutputNeuron( itgt )->SetError(error);
711 }
else if (DoMulticlass()) {
712 UInt_t nClasses = DataInfo().GetNClasses();
714 for (
UInt_t icls = 0; icls < nClasses; icls++) {
715 Double_t desired = ( cls==icls ? 1.0 : 0.0 );
716 Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
717 GetOutputNeuron( icls )->SetError(error);
720 Double_t desired = GetDesiredOutput( ev );
722 if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight;
723 else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired);
724 GetOutputNeuron()->SetError(error);
727 CalculateNeuronDeltas();
728 for (
Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
740 Int_t nSynapses = fSynapses->GetEntriesFast();
742 for (
Int_t i=0;i<nSynapses;i++) {
744 Dir[IDX++][0] = -synapse->
GetDEDw();
774 Int_t nSynapses = fSynapses->GetEntriesFast();
777 for (
Int_t i=0;i<nSynapses;i++) {
779 DEDw[IDX++][0] = synapse->
GetDEDw();
782 dir = Hessian * DEDw;
783 for (
Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
791 Int_t nSynapses = fSynapses->GetEntriesFast();
794 for (
Int_t i=0;i<nSynapses;i++) {
796 Result += Dir[IDX++][0] * synapse->
GetDEDw();
806 Int_t nSynapses = fSynapses->GetEntriesFast();
807 Int_t nWeights = nSynapses;
809 std::vector<Double_t> Origin(nWeights);
810 for (
Int_t i=0;i<nSynapses;i++) {
821 if (alpha2 < 0.01) alpha2 = 0.01;
822 else if (alpha2 > 2.0) alpha2 = 2.0;
826 SetDirWeights( Origin, Dir, alpha2 );
834 for (
Int_t i=0;i<100;i++) {
836 SetDirWeights(Origin, Dir, alpha3);
848 SetDirWeights(Origin, Dir, 0.);
853 for (
Int_t i=0;i<100;i++) {
856 Log() <<
kWARNING <<
"linesearch, starting to investigate direction opposite of steepestDIR" <<
Endl;
857 alpha2 = -alpha_original;
859 SetDirWeights(Origin, Dir, alpha2);
869 SetDirWeights(Origin, Dir, 0.);
870 Log() <<
kWARNING <<
"linesearch, failed even in opposite direction of steepestDIR" <<
Endl;
876 if (alpha1>0 && alpha2>0 && alpha3 > 0) {
877 fLastAlpha = 0.5 * (alpha1 + alpha3 -
878 (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
879 - ( err2 - err1 ) / (alpha2 - alpha1 )));
885 fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
887 SetDirWeights(Origin, Dir, fLastAlpha);
894 if (finalError > err1) {
895 Log() <<
kWARNING <<
"Line search increased error! Something is wrong."
896 <<
"fLastAlpha=" << fLastAlpha <<
"al123=" << alpha1 <<
" "
897 << alpha2 <<
" " << alpha3 <<
" err1="<< err1 <<
" errfinal=" << finalError <<
Endl;
900 for (
Int_t i=0;i<nSynapses;i++) {
902 buffer[IDX] = synapse->
GetWeight() - Origin[IDX];
906 if (dError) (*dError)=(errOrigin-finalError)/finalError;
916 Int_t nSynapses = fSynapses->GetEntriesFast();
918 for (
Int_t i=0;i<nSynapses;i++) {
920 synapse->
SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
923 if (fUseRegulator) UpdatePriors();
932 UInt_t ntgts = GetNTargets();
936 const Event* ev = GetEvent(i);
938 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
945 if (DoRegression()) {
946 for (
UInt_t itgt = 0; itgt < ntgts; itgt++) {
947 error += GetMSEErr( ev, itgt );
949 }
else if ( DoMulticlass() ){
950 for(
UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
951 error += GetMSEErr( ev, icls );
954 if (fEstimator==kMSE) error = GetMSEErr( ev );
955 else if (fEstimator==kCE) error= GetCEErr( ev );
959 if (fUseRegulator) Result+=fPrior;
960 if (Result<0)
Log()<<
kWARNING<<
"\nNegative Error!!! :"<<Result-fPrior<<
"+"<<fPrior<<
Endl;
971 if (DoRegression()) target = ev->
GetTarget( index );
972 else if (DoMulticlass()) target = (ev->
GetClass() == index ? 1.0 : 0.0 );
973 else target = GetDesiredOutput( ev );
975 error = 0.5*(output-target)*(output-target);
988 if (DoRegression()) target = ev->
GetTarget( index );
989 else if (DoMulticlass()) target = (ev->
GetClass() == index ? 1.0 : 0.0 );
990 else target = GetDesiredOutput( ev );
1003 Timer timer( (fSteps>0?100:nEpochs), GetName() );
1008 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
1009 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
1010 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
1011 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
1013 if(fSamplingTraining || fSamplingTesting)
1014 Data()->InitSampling(1.0,1.0,fRandomSeed);
1016 if (fSteps > 0)
Log() <<
kINFO <<
"Inaccurate progress timing for MLP... " <<
Endl;
1024 for (
Int_t i = 0; i < nEpochs; i++) {
1026 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
1027 if ((i+1)%fTestRate == 0 || (i == 0)) {
1028 if (fSamplingTraining) {
1030 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1031 Data()->CreateSampling();
1033 if (fSamplingTesting) {
1035 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1036 Data()->CreateSampling();
1042 Data()->InitSampling(1.0,1.0);
1044 Data()->InitSampling(1.0,1.0);
1049 DecaySynapseWeights(i >= lateEpoch);
1052 if ((i+1)%fTestRate == 0) {
1055 fEstimatorHistTrain->Fill( i+1, trainE );
1056 fEstimatorHistTest ->Fill( i+1, testE );
1059 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1062 Data()->EventResult( success );
1064 SetCurrentValue( testE );
1065 if (HasConverged()) {
1066 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
1067 Int_t newEpoch =
Int_t(fSamplingEpoch*nEpochs);
1069 ResetConvergenceCounter();
1072 if (lateEpoch > i) lateEpoch = i;
1079 TString convText =
Form(
"<D^2> (train/test): %.4g/%.4g", trainE, testE );
1082 if (
Float_t(i)/nEpochs < fSamplingEpoch)
1083 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1085 progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1105 Shuffle(index, nEvents);
1110 const Event * ev = GetEvent(index[i]);
1111 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1116 TrainOneEvent(index[i]);
1119 if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1120 AdjustSynapseWeights();
1121 if (fgPRINT_BATCH) {
1149 for (
Int_t i = 0; i <
n; i++) {
1150 j = (
Int_t) (frgen->Rndm() *
a);
1153 index[j] = index[i];
1166 Int_t numSynapses = fSynapses->GetEntriesFast();
1167 for (
Int_t i = 0; i < numSynapses; i++) {
1168 synapse = (
TSynapse*)fSynapses->At(i);
1191 if (type == 0) desired = fOutput->GetMin();
1192 else desired = fOutput->GetMax();
1198 for (
UInt_t j = 0; j < GetNvar(); j++) {
1201 neuron = GetInputNeuron(j);
1205 ForceNetworkCalculations();
1206 UpdateNetwork(desired, eventWeight);
1220 const Event * ev = GetEvent(ievt);
1222 ForceNetworkInputs( ev );
1223 ForceNetworkCalculations();
1224 if (DoRegression()) UpdateNetwork( ev->
GetTargets(), eventWeight );
1225 if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1226 else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1234 return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin();
1244 Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1245 if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ;
1246 else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired);
1247 else Log() <<
kFATAL <<
"Estimator type unspecified!!" <<
Endl;
1248 error *= eventWeight;
1249 GetOutputNeuron()->SetError(error);
1250 CalculateNeuronDeltas();
1260 for (
UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1261 Double_t error = GetOutputNeuron( i )->GetActivationValue() - desired.at(i);
1262 error *= eventWeight;
1263 GetOutputNeuron( i )->SetError(error);
1265 CalculateNeuronDeltas();
1277 Int_t numLayers = fNetwork->GetEntriesFast();
1282 for (
Int_t i = numLayers-1; i >= 0; i--) {
1286 for (
Int_t j = 0; j < numNeurons; j++) {
1303 PrintMessage(
"Minimizing Estimator with GA");
1309 fGA_SC_factor = 0.95;
1313 std::vector<Interval*> ranges;
1315 Int_t numWeights = fSynapses->GetEntriesFast();
1316 for (
Int_t ivar=0; ivar< numWeights; ivar++) {
1317 ranges.push_back(
new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1323 Double_t estimator = CalculateEstimator();
1324 Log() <<
kINFO <<
"GA: estimator after optimization: " << estimator <<
Endl;
1332 return ComputeEstimator( parameters );
1341 Int_t numSynapses = fSynapses->GetEntriesFast();
1343 for (
Int_t i = 0; i < numSynapses; i++) {
1344 synapse = (
TSynapse*)fSynapses->At(i);
1347 if (fUseRegulator) UpdatePriors();
1349 Double_t estimator = CalculateEstimator();
1364 for (
Int_t i = 0; i < numLayers; i++) {
1368 for (
Int_t j = 0; j < numNeurons; j++) {
1386 for (
Int_t i = numLayers-1; i >= 0; i--) {
1390 for (
Int_t j = 0; j < numNeurons; j++) {
1403 Int_t nSynapses = fSynapses->GetEntriesFast();
1404 for (
Int_t i=0;i<nSynapses;i++) {
1406 fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->
GetWeight())*(synapse->
GetWeight());
1407 fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->
GetWeight()));
1416 GetApproxInvHessian(InvH);
1417 Int_t numSynapses=fSynapses->GetEntriesFast();
1418 Int_t numRegulators=fRegulators.size();
1421 std::vector<Int_t> nWDP(numRegulators);
1422 std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1423 for (
int i=0;i<numSynapses;i++) {
1425 Int_t idx=fRegulatorIdx[i];
1427 trace[idx]+=InvH[i][i];
1428 gamma+=1-fRegulators[idx]*InvH[i][i];
1431 if (fEstimator==kMSE) {
1432 if (GetNEvents()>
gamma) variance=CalculateEstimator(
Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1437 for (
int i=0;i<numRegulators;i++)
1440 fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1441 if (fRegulators[i]<0) fRegulators[i]=0;
1442 Log()<<
kDEBUG<<
"R"<<i<<
":"<<fRegulators[i]<<
"\t";
1447 Log()<<
kDEBUG<<
"\n"<<
"trainE:"<<trainE<<
"\ttestE:"<<testE<<
"\tvariance:"<<variance<<
"\tgamma:"<<gamma<<
Endl;
1455 Int_t numSynapses=fSynapses->GetEntriesFast();
1456 InvHessian.
ResizeTo( numSynapses, numSynapses );
1463 double outputValue=GetMvaValue();
1464 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1465 CalculateNeuronDeltas();
1466 for (
Int_t j = 0; j < numSynapses; j++){
1470 sens[j][0]=sensT[0][j]=synapses->
GetDelta();
1472 if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1473 else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1478 for (
Int_t i = 0; i < numSynapses; i++){
1479 InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1483 for (
Int_t i = 0; i < numSynapses; i++){
1484 InvHessian[i][i]+=1e-6;
1499 if (!fCalculateErrors || errLower==0 || errUpper==0)
1502 Double_t MvaUpper,MvaLower,median,variance;
1503 Int_t numSynapses=fSynapses->GetEntriesFast();
1504 if (fInvHessian.GetNcols()!=numSynapses) {
1505 Log() <<
kWARNING <<
"inconsistent dimension " << fInvHessian.GetNcols() <<
" vs " << numSynapses <<
Endl;
1509 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1511 CalculateNeuronDeltas();
1512 for (
Int_t i = 0; i < numSynapses; i++){
1519 TMatrixD sig=sensT*fInvHessian*sens;
1521 median=GetOutputNeuron()->GetValue();
1524 Log()<<
kWARNING<<
"Negative variance!!! median=" << median <<
"\tvariance(sigma^2)=" << variance <<
Endl;
1527 variance=
sqrt(variance);
1530 MvaUpper=fOutput->Eval(median+variance);
1532 *errUpper=MvaUpper-MvaValue;
1535 MvaLower=fOutput->Eval(median-variance);
1537 *errLower=MvaValue-MvaLower;
1543 #ifdef MethodMLP_UseMinuit__
1548 void TMVA::MethodMLP::MinuitMinimize()
1550 fNumberOfWeights = fSynapses->GetEntriesFast();
1565 for (
Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1568 parName, w[ipar], 0.1, 0, 0 );
1572 tfitter->
SetFCN( &IFCN );
1595 _____________________________________________________________________________
1611 ((MethodMLP*)GetThisPtr())->
FCN( npars, grad, f, fitPars, iflag );
1614 TTHREAD_TLS(
Int_t) nc = 0;
1615 TTHREAD_TLS(
double) minf = 1000000;
1620 for (
Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1626 f = CalculateEstimator();
1629 if (f < minf) minf =
f;
1630 for (
Int_t ipar=0; ipar<fNumberOfWeights; ipar++)
Log() <<
kDEBUG << fitPars[ipar] <<
" ";
1632 Log() <<
kDEBUG <<
"***** New estimator: " << f <<
" min: " << minf <<
" --> ncalls: " << nc <<
Endl;
1666 Log() << col <<
"--- Short description:" << colres <<
Endl;
1668 Log() <<
"The MLP artificial neural network (ANN) is a traditional feed-" <<
Endl;
1669 Log() <<
"forward multilayer perceptron impementation. The MLP has a user-" <<
Endl;
1670 Log() <<
"defined hidden layer architecture, while the number of input (output)" <<
Endl;
1671 Log() <<
"nodes is determined by the input variables (output classes, i.e., " <<
Endl;
1672 Log() <<
"signal and one background). " <<
Endl;
1674 Log() << col <<
"--- Performance optimisation:" << colres <<
Endl;
1676 Log() <<
"Neural networks are stable and performing for a large variety of " <<
Endl;
1677 Log() <<
"linear and non-linear classification problems. However, in contrast" <<
Endl;
1678 Log() <<
"to (e.g.) boosted decision trees, the user is advised to reduce the " <<
Endl;
1679 Log() <<
"number of input variables that have only little discrimination power. " <<
Endl;
1681 Log() <<
"In the tests we have carried out so far, the MLP and ROOT networks" <<
Endl;
1682 Log() <<
"(TMlpANN, interfaced via TMVA) performed equally well, with however" <<
Endl;
1683 Log() <<
"a clear speed advantage for the MLP. The Clermont-Ferrand neural " <<
Endl;
1684 Log() <<
"net (CFMlpANN) exhibited worse classification performance in these" <<
Endl;
1685 Log() <<
"tests, which is partly due to the slow convergence of its training" <<
Endl;
1686 Log() <<
"(at least 10k training cycles are required to achieve approximately" <<
Endl;
1687 Log() <<
"competitive results)." <<
Endl;
1689 Log() << col <<
"Overtraining: " << colres
1690 <<
"only the TMlpANN performs an explicit separation of the" <<
Endl;
1691 Log() <<
"full training sample into independent training and validation samples." <<
Endl;
1692 Log() <<
"We have found that in most high-energy physics applications the " <<
Endl;
1693 Log() <<
"avaliable degrees of freedom (training events) are sufficient to " <<
Endl;
1694 Log() <<
"constrain the weights of the relatively simple architectures required" <<
Endl;
1695 Log() <<
"to achieve good performance. Hence no overtraining should occur, and " <<
Endl;
1696 Log() <<
"the use of validation samples would only reduce the available training" <<
Endl;
1697 Log() <<
"information. However, if the perrormance on the training sample is " <<
Endl;
1698 Log() <<
"found to be significantly better than the one found with the inde-" <<
Endl;
1699 Log() <<
"pendent test sample, caution is needed. The results for these samples " <<
Endl;
1700 Log() <<
"are printed to standard output at the end of each training job." <<
Endl;
1702 Log() << col <<
"--- Performance tuning via configuration options:" << colres <<
Endl;
1704 Log() <<
"The hidden layer architecture for all ANNs is defined by the option" <<
Endl;
1705 Log() <<
"\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" <<
Endl;
1706 Log() <<
"neurons and the second N neurons (and so on), and where N is the number " <<
Endl;
1707 Log() <<
"of input variables. Excessive numbers of hidden layers should be avoided," <<
Endl;
1708 Log() <<
"in favour of more neurons in the first hidden layer." <<
Endl;
1710 Log() <<
"The number of cycles should be above 500. As said, if the number of" <<
Endl;
1711 Log() <<
"adjustable weights is small compared to the training sample size," <<
Endl;
1712 Log() <<
"using a large number of training samples should not lead to overtraining." <<
Endl;
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
void GetHelpMessage() const
get help message text
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
MsgLogger & Endl(MsgLogger &ml)
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
virtual ~MethodMLP()
destructor nothing to be done
void Init()
default initializations
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
void DecayLearningRate(Double_t rate)
void SetDEDw(Double_t DEDw)
void TrainOneEpoch()
train network over a single epoch/cyle of events
void SteepestDir(TMatrixD &Dir)
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: TrainingMetho...
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Int_t GetEntriesFast() const
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
ClassImp(TIterator) Bool_t TIterator return false
Compare two iterator objects.
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
const char * Data() const
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
void Shuffle(Int_t *index, Int_t n)
Input: index: the array to shuffle n: the size of the array Output: index: the shuffled indexes This ...
Double_t Run()
estimator function interface for fitting
void(* FCN)(Int_t &npar, Double_t *gin, Double_t &f, Double_t *u, Int_t flag)
void CalculateDelta()
calculate/adjust the error field for this synapse
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
std::vector< std::vector< double > > Data
virtual void ProcessOptions()
do nothing specific at this moment
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
std::vector< Float_t > & GetTargets()
Double_t GetCEErr(const Event *ev, UInt_t index=0)
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
TMatrixT< Double_t > TMatrixD
void InitializeLearningRates()
initialize learning rates of synapses, used only by backpropagation
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum ...
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
void SetLearningRate(Double_t rate)
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with backpropagation algorithm
char * Form(const char *fmt,...)
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
virtual void SetFCN(void *fcn)
Specify the address of the fitting algorithm (from the interpreter)
void SetWeight(Double_t w)
Sets the weight of the synapse.
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
void CalculateNeuronDeltas()
have each neuron calculate its delta by backpropagation
Describe directory structure in memory.
void SimulateEvent(const Event *ev)
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
void SetWeight(Double_t weight)
set synapse weight
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Float_t GetTarget(UInt_t itgt) const
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption, TDirectory *theTargetDir=0)
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
void ProcessOptions()
process user options
Double_t EstimatorFunction(std::vector< Double_t > ¶meters)
interface to the estimate
Double_t ComputeEstimator(std::vector< Double_t > ¶meters)
this function is called by GeneticANN for GA optimization
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Bool_t WriteOptionsReference() const
Double_t Sqrt(Double_t x)
TObject * At(Int_t idx) const
Double_t DerivDir(TMatrixD &Dir)
static void output(int code)
void CalculateDelta()
calculate error field
double norm(double *x, double *p)
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter ipar : parameter number parname : parameter name value : initial p...