78#ifdef MethodMLP_UseMinuit__
97 fUseRegulator(false), fCalculateErrors(false),
98 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
99 fTrainingMethod(kBFGS), fTrainMethodS(
"BFGS"),
100 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
101 fSamplingTraining(false), fSamplingTesting(false),
102 fLastAlpha(0.0), fTau(0.),
103 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
104 fBPMode(kSequential), fBpModeS(
"None"),
105 fBatchSize(0), fTestRate(0), fEpochMon(false),
106 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
107 fGA_SC_rate(0), fGA_SC_factor(0.0),
108 fDeviationsFromTargets(0),
120 fUseRegulator(false), fCalculateErrors(false),
121 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
122 fTrainingMethod(kBFGS), fTrainMethodS(
"BFGS"),
123 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
124 fSamplingTraining(false), fSamplingTesting(false),
125 fLastAlpha(0.0), fTau(0.),
126 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
127 fBPMode(kSequential), fBpModeS(
"None"),
128 fBatchSize(0), fTestRate(0), fEpochMon(false),
129 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
130 fGA_SC_rate(0), fGA_SC_factor(0.0),
131 fDeviationsFromTargets(0),
169 SetSignalReferenceCut( 0.5 );
170#ifdef MethodMLP_UseMinuit__
199 DeclareOptionRef(fTrainMethodS=
"BP",
"TrainingMethod",
200 "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
205 DeclareOptionRef(fLearnRate=0.02,
"LearningRate",
"ANN learning rate parameter");
206 DeclareOptionRef(fDecayRate=0.01,
"DecayRate",
"Decay rate for learning parameter");
207 DeclareOptionRef(fTestRate =10,
"TestRate",
"Test for overtraining performed at each #th epochs");
208 DeclareOptionRef(fEpochMon =
kFALSE,
"EpochMonitoring",
"Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
210 DeclareOptionRef(fSamplingFraction=1.0,
"Sampling",
"Only 'Sampling' (randomly selected) events are trained each epoch");
211 DeclareOptionRef(fSamplingEpoch=1.0,
"SamplingEpoch",
"Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
212 DeclareOptionRef(fSamplingWeight=1.0,
"SamplingImportance",
" The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
214 DeclareOptionRef(fSamplingTraining=
kTRUE,
"SamplingTraining",
"The training sample is sampled");
215 DeclareOptionRef(fSamplingTesting=
kFALSE,
"SamplingTesting" ,
"The testing sample is sampled");
217 DeclareOptionRef(fResetStep=50,
"ResetStep",
"How often BFGS should reset history");
218 DeclareOptionRef(fTau =3.0,
"Tau",
"LineSearch \"size step\"");
220 DeclareOptionRef(fBpModeS=
"sequential",
"BPMode",
221 "Back-propagation learning mode: sequential or batch");
222 AddPreDefVal(
TString(
"sequential"));
223 AddPreDefVal(
TString(
"batch"));
225 DeclareOptionRef(fBatchSize=-1,
"BatchSize",
226 "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
228 DeclareOptionRef(fImprovement=1
e-30,
"ConvergenceImprove",
229 "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
231 DeclareOptionRef(fSteps=-1,
"ConvergenceTests",
232 "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
234 DeclareOptionRef(fUseRegulator=
kFALSE,
"UseRegulator",
235 "Use regulator to avoid over-training");
236 DeclareOptionRef(fUpdateLimit=10000,
"UpdateLimit",
237 "Maximum times of regulator update");
238 DeclareOptionRef(fCalculateErrors=
kFALSE,
"CalculateErrors",
239 "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value");
241 DeclareOptionRef(fWeightRange=1.0,
"WeightRange",
242 "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
254 if (IgnoreEventsWithNegWeightsInTraining()) {
256 <<
"Will ignore negative events in training!"
261 if (fTrainMethodS ==
"BP" ) fTrainingMethod = kBP;
262 else if (fTrainMethodS ==
"BFGS") fTrainingMethod = kBFGS;
263 else if (fTrainMethodS ==
"GA" ) fTrainingMethod = kGA;
265 if (fBpModeS ==
"sequential") fBPMode = kSequential;
266 else if (fBpModeS ==
"batch") fBPMode = kBatch;
270 if (fBPMode == kBatch) {
272 Int_t numEvents = Data()->GetNEvents();
273 if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
282 Log() << kDEBUG <<
"Initialize learning rates" <<
Endl;
284 Int_t numSynapses = fSynapses->GetEntriesFast();
285 for (
Int_t i = 0; i < numSynapses; i++) {
286 synapse = (
TSynapse*)fSynapses->At(i);
298 Log() << kFATAL <<
"<CalculateEstimator> fatal error: wrong tree type: " << treeType <<
Endl;
302 Data()->SetCurrentType(treeType);
313 if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
314 histS =
new TH1F( nameS, nameS, nbin, -limit, limit );
315 histB =
new TH1F( nameB, nameB, nbin, -limit, limit );
321 Int_t nEvents = GetNEvents();
322 UInt_t nClasses = DataInfo().GetNClasses();
323 UInt_t nTgts = DataInfo().GetNTargets();
327 if( fWeightRange < 1.f ){
328 fDeviationsFromTargets =
new std::vector<std::pair<Float_t,Float_t> >(nEvents);
331 for (
Int_t i = 0; i < nEvents; i++) {
333 const Event* ev = GetEvent(i);
335 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
342 ForceNetworkInputs( ev );
343 ForceNetworkCalculations();
346 if (DoRegression()) {
347 for (
UInt_t itgt = 0; itgt < nTgts; itgt++) {
348 v = GetOutputNeuron( itgt )->GetActivationValue();
354 }
else if (DoMulticlass() ) {
356 if (fEstimator==kCE){
358 for (
UInt_t icls = 0; icls < nClasses; icls++) {
359 Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
360 norm +=
exp( activationValue );
362 d =
exp( activationValue );
367 for (
UInt_t icls = 0; icls < nClasses; icls++) {
368 Double_t desired = (icls==cls) ? 1.0 : 0.0;
369 v = GetOutputNeuron( icls )->GetActivationValue();
370 d = (desired-
v)*(desired-
v);
375 Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
376 v = GetOutputNeuron()->GetActivationValue();
377 if (fEstimator==kMSE)
d = (desired-
v)*(desired-
v);
382 if( fDeviationsFromTargets )
383 fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(
d,w));
389 if (DataInfo().IsSignal(ev) && histS != 0) histS->
Fill(
float(
v), float(w) );
390 else if (histB != 0) histB->
Fill(
float(
v), float(w) );
394 if( fDeviationsFromTargets ) {
395 std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
397 Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
400 Float_t weightRangeCut = fWeightRange*sumOfWeights;
402 for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
403 float deviation = (*itDev).first;
404 float devWeight = (*itDev).second;
405 weightSum += devWeight;
406 if( weightSum <= weightRangeCut ) {
407 estimator += devWeight*deviation;
411 sumOfWeights = sumOfWeightsInRange;
412 delete fDeviationsFromTargets;
415 if (histS != 0) fEpochMonHistS.push_back( histS );
416 if (histB != 0) fEpochMonHistB.push_back( histB );
421 estimator = estimator/
Float_t(sumOfWeights);
426 Data()->SetCurrentType( saveType );
429 if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType ==
Types::kTraining) {
430 CreateWeightMonitoringHists(
Form(
"epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
442 Log() << kFATAL <<
"ANN Network is not initialized, doing it now!"<<
Endl;
443 SetAnalysisType(GetAnalysisType());
445 Log() << kDEBUG <<
"reinitialize learning rates" <<
Endl;
446 InitializeLearningRates();
448 PrintMessage(
"Training Network");
450 Int_t nEvents=GetNEvents();
451 Int_t nSynapses=fSynapses->GetEntriesFast();
452 if (nSynapses>nEvents)
453 Log()<<kWARNING<<
"ANN too complicated: #events="<<nEvents<<
"\t#synapses="<<nSynapses<<
Endl;
455 fIPyMaxIter = nEpochs;
456 if (fInteractive && fInteractive->NotInitialized()){
457 std::vector<TString> titles = {
"Error on training set",
"Error on test set"};
458 fInteractive->Init(titles);
461#ifdef MethodMLP_UseMinuit__
462 if (useMinuit) MinuitMinimize();
464 if (fTrainingMethod == kGA) GeneticMinimize();
465 else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
466 else BackPropagationMinimize(nEpochs);
472 Log()<<kINFO<<
"Finalizing handling of Regulator terms, trainE="<<trainE<<
" testE="<<testE<<
Endl;
474 Log()<<kINFO<<
"Done with handling of Regulator terms"<<
Endl;
477 if( fCalculateErrors || fUseRegulator )
479 Int_t numSynapses=fSynapses->GetEntriesFast();
480 fInvHessian.ResizeTo(numSynapses,numSynapses);
481 GetApproxInvHessian( fInvHessian ,
false);
491 Timer timer( (fSteps>0?100:nEpochs), GetName() );
497 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
498 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
499 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
500 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
503 Int_t nSynapses = fSynapses->GetEntriesFast();
504 Int_t nWeights = nSynapses;
506 for (
Int_t i=0;i<nSynapses;i++) {
511 std::vector<Double_t> buffer( nWeights );
512 for (
Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
515 TMatrixD Hessian ( nWeights, nWeights );
519 Int_t RegUpdateTimes=0;
527 if(fSamplingTraining || fSamplingTesting)
528 Data()->InitSampling(1.0,1.0,fRandomSeed);
530 if (fSteps > 0)
Log() << kINFO <<
"Inaccurate progress timing for MLP... " <<
Endl;
534 for (
Int_t i = 0; i < nEpochs; i++) {
536 if (fExitFromTraining)
break;
538 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
539 if ((i+1)%fTestRate == 0 || (i == 0)) {
540 if (fSamplingTraining) {
542 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
543 Data()->CreateSampling();
545 if (fSamplingTesting) {
547 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
548 Data()->CreateSampling();
554 Data()->InitSampling(1.0,1.0);
556 Data()->InitSampling(1.0,1.0);
567 SetGammaDelta(
Gamma, Delta, buffer );
569 if (i % fResetStep == 0 && i<0.5*nEpochs) {
575 if (GetHessian( Hessian,
Gamma, Delta )) {
580 else SetDir( Hessian, Dir );
584 if (DerivDir( Dir ) > 0) {
589 if (LineSearch( Dir, buffer, &dError )) {
593 if (LineSearch(Dir, buffer, &dError)) {
595 Log() << kFATAL <<
"Line search failed! Huge troubles somewhere..." <<
Endl;
600 if (dError<0)
Log()<<kWARNING<<
"\nnegative dError=" <<dError<<
Endl;
603 if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 &&
fabs(dError)<0.1*AccuError) {
604 Log()<<kDEBUG<<
"\n\nUpdate regulators "<<RegUpdateTimes<<
" on epoch "<<i<<
"\tdError="<<dError<<
Endl;
614 if ((i+1)%fTestRate == 0) {
619 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
622 fEstimatorHistTrain->Fill( i+1, trainE );
623 fEstimatorHistTest ->Fill( i+1, testE );
626 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1
e-100)) {
629 Data()->EventResult( success );
631 SetCurrentValue( testE );
632 if (HasConverged()) {
633 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
634 Int_t newEpoch =
Int_t(fSamplingEpoch*nEpochs);
636 ResetConvergenceCounter();
643 TString convText =
Form(
"<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i );
646 if (
Float_t(i)/nEpochs < fSamplingEpoch)
648 progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
652 progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
654 Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit;
655 if (progress2>progress) progress=progress2;
660 if (progress<i) progress=i;
676 Int_t nWeights = fSynapses->GetEntriesFast();
679 Int_t nSynapses = fSynapses->GetEntriesFast();
680 for (
Int_t i=0;i<nSynapses;i++) {
685 for (
Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
690 for (
Int_t i=0;i<nSynapses;i++)
701 Int_t nSynapses = fSynapses->GetEntriesFast();
702 for (
Int_t i=0;i<nSynapses;i++) {
707 Int_t nEvents = GetNEvents();
708 Int_t nPosEvents = nEvents;
709 for (
Int_t i=0;i<nEvents;i++) {
711 const Event* ev = GetEvent(i);
712 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
720 for (
Int_t j=0;j<nSynapses;j++) {
726 for (
Int_t i=0;i<nSynapses;i++) {
729 if (fUseRegulator) DEDw+=fPriorDev[i];
730 synapse->
SetDEDw( DEDw / nPosEvents );
740 ForceNetworkInputs( ev );
741 ForceNetworkCalculations();
743 if (DoRegression()) {
744 UInt_t ntgt = DataInfo().GetNTargets();
745 for (
UInt_t itgt = 0; itgt < ntgt; itgt++) {
747 Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
748 GetOutputNeuron( itgt )->SetError(error);
750 }
else if (DoMulticlass()) {
751 UInt_t nClasses = DataInfo().GetNClasses();
753 for (
UInt_t icls = 0; icls < nClasses; icls++) {
754 Double_t desired = ( cls==icls ? 1.0 : 0.0 );
755 Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
756 GetOutputNeuron( icls )->SetError(error);
759 Double_t desired = GetDesiredOutput( ev );
761 if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight;
762 else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired);
763 GetOutputNeuron()->SetError(error);
766 CalculateNeuronDeltas();
767 for (
Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
779 Int_t nSynapses = fSynapses->GetEntriesFast();
781 for (
Int_t i=0;i<nSynapses;i++) {
783 Dir[IDX++][0] = -synapse->
GetDEDw();
813 Int_t nSynapses = fSynapses->GetEntriesFast();
816 for (
Int_t i=0;i<nSynapses;i++) {
818 DEDw[IDX++][0] = synapse->
GetDEDw();
821 dir = Hessian * DEDw;
822 for (
Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
830 Int_t nSynapses = fSynapses->GetEntriesFast();
833 for (
Int_t i=0;i<nSynapses;i++) {
835 Result += Dir[IDX++][0] * synapse->
GetDEDw();
845 Int_t nSynapses = fSynapses->GetEntriesFast();
846 Int_t nWeights = nSynapses;
848 std::vector<Double_t> Origin(nWeights);
849 for (
Int_t i=0;i<nSynapses;i++) {
860 if (alpha2 < 0.01) alpha2 = 0.01;
861 else if (alpha2 > 2.0) alpha2 = 2.0;
865 SetDirWeights( Origin, Dir, alpha2 );
873 for (
Int_t i=0;i<100;i++) {
875 SetDirWeights(Origin, Dir, alpha3);
887 SetDirWeights(Origin, Dir, 0.);
892 for (
Int_t i=0;i<100;i++) {
895 Log() << kWARNING <<
"linesearch, starting to investigate direction opposite of steepestDIR" <<
Endl;
896 alpha2 = -alpha_original;
898 SetDirWeights(Origin, Dir, alpha2);
908 SetDirWeights(Origin, Dir, 0.);
909 Log() << kWARNING <<
"linesearch, failed even in opposite direction of steepestDIR" <<
Endl;
915 if (alpha1>0 && alpha2>0 && alpha3 > 0) {
916 fLastAlpha = 0.5 * (alpha1 + alpha3 -
917 (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
918 - ( err2 - err1 ) / (alpha2 - alpha1 )));
924 fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
926 SetDirWeights(Origin, Dir, fLastAlpha);
933 if (finalError > err1) {
934 Log() << kWARNING <<
"Line search increased error! Something is wrong."
935 <<
"fLastAlpha=" << fLastAlpha <<
"al123=" << alpha1 <<
" "
936 << alpha2 <<
" " << alpha3 <<
" err1="<< err1 <<
" errfinal=" << finalError <<
Endl;
939 for (
Int_t i=0;i<nSynapses;i++) {
941 buffer[IDX] = synapse->
GetWeight() - Origin[IDX];
945 if (dError) (*dError)=(errOrigin-finalError)/finalError;
955 Int_t nSynapses = fSynapses->GetEntriesFast();
957 for (
Int_t i=0;i<nSynapses;i++) {
959 synapse->
SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
962 if (fUseRegulator) UpdatePriors();
970 Int_t nEvents = GetNEvents();
971 UInt_t ntgts = GetNTargets();
974 for (
Int_t i=0;i<nEvents;i++) {
975 const Event* ev = GetEvent(i);
977 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
984 if (DoRegression()) {
985 for (
UInt_t itgt = 0; itgt < ntgts; itgt++) {
986 error += GetMSEErr( ev, itgt );
988 }
else if ( DoMulticlass() ){
989 for(
UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
990 error += GetMSEErr( ev, icls );
993 if (fEstimator==kMSE) error = GetMSEErr( ev );
994 else if (fEstimator==kCE) error= GetCEErr( ev );
998 if (fUseRegulator) Result+=fPrior;
999 if (Result<0)
Log()<<kWARNING<<
"\nNegative Error!!! :"<<Result-fPrior<<
"+"<<fPrior<<
Endl;
1008 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1010 if (DoRegression()) target = ev->
GetTarget( index );
1011 else if (DoMulticlass()) target = (ev->
GetClass() == index ? 1.0 : 0.0 );
1012 else target = GetDesiredOutput( ev );
1025 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1027 if (DoRegression()) target = ev->
GetTarget( index );
1028 else if (DoMulticlass()) target = (ev->
GetClass() == index ? 1.0 : 0.0 );
1029 else target = GetDesiredOutput( ev );
1042 Timer timer( (fSteps>0?100:nEpochs), GetName() );
1049 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
1050 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
1051 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
1052 nbinTest,
Int_t(fTestRate/2), nbinTest*fTestRate+
Int_t(fTestRate/2) );
1054 if(fSamplingTraining || fSamplingTesting)
1055 Data()->InitSampling(1.0,1.0,fRandomSeed);
1057 if (fSteps > 0)
Log() << kINFO <<
"Inaccurate progress timing for MLP... " <<
Endl;
1065 for (
Int_t i = 0; i < nEpochs; i++) {
1067 if (fExitFromTraining)
break;
1068 fIPyCurrentIter = i;
1069 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
1070 if ((i+1)%fTestRate == 0 || (i == 0)) {
1071 if (fSamplingTraining) {
1073 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1074 Data()->CreateSampling();
1076 if (fSamplingTesting) {
1078 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1079 Data()->CreateSampling();
1085 Data()->InitSampling(1.0,1.0);
1087 Data()->InitSampling(1.0,1.0);
1092 DecaySynapseWeights(i >= lateEpoch);
1095 if ((i+1)%fTestRate == 0) {
1098 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1101 fEstimatorHistTrain->Fill( i+1, trainE );
1102 fEstimatorHistTest ->Fill( i+1, testE );
1105 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1
e-100)) {
1108 Data()->EventResult( success );
1110 SetCurrentValue( testE );
1111 if (HasConverged()) {
1112 if (
Float_t(i)/nEpochs < fSamplingEpoch) {
1113 Int_t newEpoch =
Int_t(fSamplingEpoch*nEpochs);
1115 ResetConvergenceCounter();
1118 if (lateEpoch > i) lateEpoch = i;
1125 TString convText =
Form(
"<D^2> (train/test): %.4g/%.4g", trainE, testE );
1128 if (
Float_t(i)/nEpochs < fSamplingEpoch)
1129 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1131 progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1146 Int_t nEvents = Data()->GetNEvents();
1150 for (
Int_t i = 0; i < nEvents; i++) index[i] = i;
1151 Shuffle(index, nEvents);
1154 for (
Int_t i = 0; i < nEvents; i++) {
1156 const Event * ev = GetEvent(index[i]);
1157 if ((ev->
GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1162 TrainOneEvent(index[i]);
1165 if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1166 AdjustSynapseWeights();
1167 if (fgPRINT_BATCH) {
1196 for (
Int_t i = 0; i <
n; i++) {
1197 j = (
Int_t) (frgen->Rndm() *
a);
1200 index[j] = index[i];
1213 Int_t numSynapses = fSynapses->GetEntriesFast();
1214 for (
Int_t i = 0; i < numSynapses; i++) {
1215 synapse = (
TSynapse*)fSynapses->At(i);
1238 if (
type == 0) desired = fOutput->GetMin();
1239 else desired = fOutput->GetMax();
1245 for (
UInt_t j = 0; j < GetNvar(); j++) {
1248 neuron = GetInputNeuron(j);
1252 ForceNetworkCalculations();
1253 UpdateNetwork(desired, eventWeight);
1267 const Event * ev = GetEvent(ievt);
1269 ForceNetworkInputs( ev );
1270 ForceNetworkCalculations();
1271 if (DoRegression()) UpdateNetwork( ev->
GetTargets(), eventWeight );
1272 if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1273 else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1281 return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin();
1290 Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1291 if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ;
1292 else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired);
1293 else Log() << kFATAL <<
"Estimator type unspecified!!" <<
Endl;
1294 error *= eventWeight;
1295 GetOutputNeuron()->SetError(error);
1296 CalculateNeuronDeltas();
1308 for (
UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1309 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1314 for (
UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1315 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1318 error *= eventWeight;
1319 GetOutputNeuron(i)->SetError(error);
1323 CalculateNeuronDeltas();
1334 Int_t numLayers = fNetwork->GetEntriesFast();
1339 for (
Int_t i = numLayers-1; i >= 0; i--) {
1343 for (
Int_t j = 0; j < numNeurons; j++) {
1360 PrintMessage(
"Minimizing Estimator with GA");
1366 fGA_SC_factor = 0.95;
1370 std::vector<Interval*> ranges;
1372 Int_t numWeights = fSynapses->GetEntriesFast();
1373 for (
Int_t ivar=0; ivar< numWeights; ivar++) {
1374 ranges.push_back(
new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1380 Double_t estimator = CalculateEstimator();
1381 Log() << kINFO <<
"GA: estimator after optimization: " << estimator <<
Endl;
1389 return ComputeEstimator( parameters );
1398 Int_t numSynapses = fSynapses->GetEntriesFast();
1400 for (
Int_t i = 0; i < numSynapses; i++) {
1401 synapse = (
TSynapse*)fSynapses->At(i);
1404 if (fUseRegulator) UpdatePriors();
1406 Double_t estimator = CalculateEstimator();
1419 Int_t numLayers = fNetwork->GetEntriesFast();
1421 for (
Int_t i = 0; i < numLayers; i++) {
1425 for (
Int_t j = 0; j < numNeurons; j++) {
1441 Int_t numLayers = fNetwork->GetEntriesFast();
1443 for (
Int_t i = numLayers-1; i >= 0; i--) {
1447 for (
Int_t j = 0; j < numNeurons; j++) {
1460 Int_t nSynapses = fSynapses->GetEntriesFast();
1461 for (
Int_t i=0;i<nSynapses;i++) {
1463 fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->
GetWeight())*(synapse->
GetWeight());
1464 fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->
GetWeight()));
1473 GetApproxInvHessian(InvH);
1474 Int_t numSynapses=fSynapses->GetEntriesFast();
1475 Int_t numRegulators=fRegulators.size();
1478 std::vector<Int_t> nWDP(numRegulators);
1479 std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1480 for (
int i=0;i<numSynapses;i++) {
1482 Int_t idx=fRegulatorIdx[i];
1484 trace[idx]+=InvH[i][i];
1485 gamma+=1-fRegulators[idx]*InvH[i][i];
1488 if (fEstimator==kMSE) {
1494 for (
int i=0;i<numRegulators;i++)
1497 fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1498 if (fRegulators[i]<0) fRegulators[i]=0;
1499 Log()<<kDEBUG<<
"R"<<i<<
":"<<fRegulators[i]<<
"\t";
1504 Log()<<kDEBUG<<
"\n"<<
"trainE:"<<trainE<<
"\ttestE:"<<testE<<
"\tvariance:"<<variance<<
"\tgamma:"<<
gamma<<
Endl;
1512 Int_t numSynapses=fSynapses->GetEntriesFast();
1513 InvHessian.
ResizeTo( numSynapses, numSynapses );
1517 Int_t nEvents = GetNEvents();
1518 for (
Int_t i=0;i<nEvents;i++) {
1520 double outputValue=GetMvaValue();
1521 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1522 CalculateNeuronDeltas();
1523 for (
Int_t j = 0; j < numSynapses; j++){
1527 sens[j][0]=sensT[0][j]=synapses->
GetDelta();
1529 if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1530 else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1535 for (
Int_t i = 0; i < numSynapses; i++){
1536 InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1540 for (
Int_t i = 0; i < numSynapses; i++){
1541 InvHessian[i][i]+=1
e-6;
1556 if (!fCalculateErrors || errLower==0 || errUpper==0)
1559 Double_t MvaUpper,MvaLower,median,variance;
1560 Int_t numSynapses=fSynapses->GetEntriesFast();
1561 if (fInvHessian.GetNcols()!=numSynapses) {
1562 Log() << kWARNING <<
"inconsistent dimension " << fInvHessian.GetNcols() <<
" vs " << numSynapses <<
Endl;
1566 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1568 CalculateNeuronDeltas();
1569 for (
Int_t i = 0; i < numSynapses; i++){
1576 TMatrixD sig=sensT*fInvHessian*sens;
1578 median=GetOutputNeuron()->GetValue();
1581 Log()<<kWARNING<<
"Negative variance!!! median=" << median <<
"\tvariance(sigma^2)=" << variance <<
Endl;
1584 variance=
sqrt(variance);
1587 MvaUpper=fOutput->Eval(median+variance);
1589 *errUpper=MvaUpper-MvaValue;
1592 MvaLower=fOutput->Eval(median-variance);
1594 *errLower=MvaValue-MvaLower;
1600#ifdef MethodMLP_UseMinuit__
1605void TMVA::MethodMLP::MinuitMinimize()
1607 fNumberOfWeights = fSynapses->GetEntriesFast();
1622 for (
Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1625 parName, w[ipar], 0.1, 0, 0 );
1629 tfitter->
SetFCN( &IFCN );
1668 ((MethodMLP*)GetThisPtr())->FCN( npars, grad,
f, fitPars, iflag );
1671TTHREAD_TLS(
Int_t) nc = 0;
1672TTHREAD_TLS(
double) minf = 1000000;
1677 for (
Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1683 f = CalculateEstimator();
1686 if (
f < minf) minf =
f;
1687 for (
Int_t ipar=0; ipar<fNumberOfWeights; ipar++)
Log() << kDEBUG << fitPars[ipar] <<
" ";
1689 Log() << kDEBUG <<
"***** New estimator: " <<
f <<
" min: " << minf <<
" --> ncalls: " << nc <<
Endl;
1723 Log() << col <<
"--- Short description:" << colres <<
Endl;
1725 Log() <<
"The MLP artificial neural network (ANN) is a traditional feed-" <<
Endl;
1726 Log() <<
"forward multilayer perceptron implementation. The MLP has a user-" <<
Endl;
1727 Log() <<
"defined hidden layer architecture, while the number of input (output)" <<
Endl;
1728 Log() <<
"nodes is determined by the input variables (output classes, i.e., " <<
Endl;
1729 Log() <<
"signal and one background). " <<
Endl;
1731 Log() << col <<
"--- Performance optimisation:" << colres <<
Endl;
1733 Log() <<
"Neural networks are stable and performing for a large variety of " <<
Endl;
1734 Log() <<
"linear and non-linear classification problems. However, in contrast" <<
Endl;
1735 Log() <<
"to (e.g.) boosted decision trees, the user is advised to reduce the " <<
Endl;
1736 Log() <<
"number of input variables that have only little discrimination power. " <<
Endl;
1738 Log() <<
"In the tests we have carried out so far, the MLP and ROOT networks" <<
Endl;
1739 Log() <<
"(TMlpANN, interfaced via TMVA) performed equally well, with however" <<
Endl;
1740 Log() <<
"a clear speed advantage for the MLP. The Clermont-Ferrand neural " <<
Endl;
1741 Log() <<
"net (CFMlpANN) exhibited worse classification performance in these" <<
Endl;
1742 Log() <<
"tests, which is partly due to the slow convergence of its training" <<
Endl;
1743 Log() <<
"(at least 10k training cycles are required to achieve approximately" <<
Endl;
1744 Log() <<
"competitive results)." <<
Endl;
1746 Log() << col <<
"Overtraining: " << colres
1747 <<
"only the TMlpANN performs an explicit separation of the" <<
Endl;
1748 Log() <<
"full training sample into independent training and validation samples." <<
Endl;
1749 Log() <<
"We have found that in most high-energy physics applications the " <<
Endl;
1750 Log() <<
"available degrees of freedom (training events) are sufficient to " <<
Endl;
1751 Log() <<
"constrain the weights of the relatively simple architectures required" <<
Endl;
1752 Log() <<
"to achieve good performance. Hence no overtraining should occur, and " <<
Endl;
1753 Log() <<
"the use of validation samples would only reduce the available training" <<
Endl;
1754 Log() <<
"information. However, if the performance on the training sample is " <<
Endl;
1755 Log() <<
"found to be significantly better than the one found with the inde-" <<
Endl;
1756 Log() <<
"pendent test sample, caution is needed. The results for these samples " <<
Endl;
1757 Log() <<
"are printed to standard output at the end of each training job." <<
Endl;
1759 Log() << col <<
"--- Performance tuning via configuration options:" << colres <<
Endl;
1761 Log() <<
"The hidden layer architecture for all ANNs is defined by the option" <<
Endl;
1762 Log() <<
"\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" <<
Endl;
1763 Log() <<
"neurons and the second N neurons (and so on), and where N is the number " <<
Endl;
1764 Log() <<
"of input variables. Excessive numbers of hidden layers should be avoided," <<
Endl;
1765 Log() <<
"in favour of more neurons in the first hidden layer." <<
Endl;
1767 Log() <<
"The number of cycles should be above 500. As said, if the number of" <<
Endl;
1768 Log() <<
"adjustable weights is small compared to the training sample size," <<
Endl;
1769 Log() <<
"using a large number of training samples should not lead to overtraining." <<
Endl;
#define REGISTER_METHOD(CLASS)
for example
TMatrixT< Double_t > TMatrixD
char * Form(const char *fmt,...)
The ROOT standard fitter based on TMinuit.
virtual Int_t SetParameter(Int_t ipar, const char *parname, Double_t value, Double_t verr, Double_t vlow, Double_t vhigh)
set initial values for a parameter
virtual Int_t ExecuteCommand(const char *command, Double_t *args, Int_t nargs)
Execute a fitter command; command : command string args : list of nargs command arguments.
virtual void SetFCN(void(*fcn)(Int_t &, Double_t *, Double_t &f, Double_t *, Int_t))
Specify the address of the fitting algorithm.
1-D histogram with a float per channel (see TH1 documentation)}
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Bool_t WriteOptionsReference() const
Class that contains all the data information.
std::vector< Float_t > & GetTargets()
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Float_t GetTarget(UInt_t itgt) const
Base class for TMVA fitters.
Double_t Run()
estimator function interface for fitting
Fitter using a Genetic Algorithm.
The TMVA::Interval Class.
Base class for all TMVA methods using artificial neural networks.
virtual void ProcessOptions()
do nothing specific at this moment
virtual void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
virtual Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
Multilayer Perceptron class built off of MethodANNBase.
Bool_t LineSearch(TMatrixD &Dir, std::vector< Double_t > &Buffer, Double_t *dError=0)
void GetHelpMessage() const
get help message text
void BackPropagationMinimize(Int_t nEpochs)
minimize estimator / train network with back propagation algorithm
Double_t GetMSEErr(const Event *ev, UInt_t index=0)
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
get the mva value generated by the NN
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void AdjustSynapseWeights()
just adjust the synapse weights (should be called in batch mode)
void SteepestDir(TMatrixD &Dir)
void TrainOneEpoch()
train network over a single epoch/cycle of events
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
MLP can handle classification with 2 classes and regression with one regression-target.
Bool_t GetHessian(TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta)
Double_t ComputeEstimator(std::vector< Double_t > ¶meters)
this function is called by GeneticANN for GA optimization
void InitializeLearningRates()
initialize learning rates of synapses, used only by back propagation
void CalculateNeuronDeltas()
have each neuron calculate its delta by back propagation
Double_t DerivDir(TMatrixD &Dir)
Double_t GetCEErr(const Event *ev, UInt_t index=0)
virtual ~MethodMLP()
destructor nothing to be done
void SetDir(TMatrixD &Hessian, TMatrixD &Dir)
void Shuffle(Int_t *index, Int_t n)
Input:
void SimulateEvent(const Event *ev)
void SetDirWeights(std::vector< Double_t > &Origin, TMatrixD &Dir, Double_t alpha)
void SetGammaDelta(TMatrixD &Gamma, TMatrixD &Delta, std::vector< Double_t > &Buffer)
Double_t EstimatorFunction(std::vector< Double_t > ¶meters)
interface to the estimate
void GetApproxInvHessian(TMatrixD &InvHessian, bool regulate=true)
void BFGSMinimize(Int_t nEpochs)
train network with BFGS algorithm
void UpdateSynapses()
update synapse error fields and adjust the weights (if in sequential mode)
void Init()
default initializations
void ProcessOptions()
process user options
void TrainOneEvent(Int_t ievt)
train network over a single event this uses the new event model
Double_t GetDesiredOutput(const Event *ev)
get the desired output of this event
void GeneticMinimize()
create genetics class similar to GeneticCut give it vector of parameter ranges (parameters = weights)...
void DecaySynapseWeights(Bool_t lateEpoch)
decay synapse weights in last 10 epochs, lower learning rate even more to find a good minimum
void TrainOneEventFast(Int_t ievt, Float_t *&branchVar, Int_t &type)
fast per-event training
void UpdateNetwork(Double_t desired, Double_t eventWeight=1.0)
update the network based on how closely the output matched the desired output
MethodMLP(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor
void DeclareOptions()
define the options (their key words) that can be set in the option string
Double_t CalculateEstimator(Types::ETreeType treeType=Types::kTraining, Int_t iEpoch=-1)
calculate the estimator that training is attempting to minimize
Neuron class used by TMVA artificial neural network methods.
void AdjustSynapseWeights()
adjust the pre-synapses' weights for each neuron (input neuron has no pre-synapse) this method should...
void ForceValue(Double_t value)
force the value, typically for input and bias neurons
void UpdateSynapsesSequential()
update the pre-synapses for each neuron (input neuron has no pre-synapse) this method should only be ...
void UpdateSynapsesBatch()
update and adjust the pre-synapses for each neuron (input neuron has no pre-synapse) this method shou...
void CalculateDelta()
calculate error field
Synapse class used by TMVA artificial neural network methods.
void SetWeight(Double_t weight)
set synapse weight
void SetDEDw(Double_t DEDw)
void SetLearningRate(Double_t rate)
void DecayLearningRate(Double_t rate)
void CalculateDelta()
calculate/adjust the error field for this synapse
Timing information for training and evaluation of MVA methods.
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Singleton class for Global types used by TMVA.
virtual TMatrixTBase< Element > & UnitMatrix()
Make a unit matrix (matrix need not be a square one).
virtual TMatrixTBase< Element > & ResizeTo(Int_t nrows, Int_t ncols, Int_t=-1)
Set size of the matrix to nrows x ncols New dynamic elements are created, the overlapping part of the...
TMatrixT< Element > & Invert(Double_t *det=0)
Invert the matrix and calculate its determinant.
TMatrixT< Element > & Transpose(const TMatrixT< Element > &source)
Transpose matrix source.
Int_t GetEntriesFast() const
TObject * At(Int_t idx) const
This is a simple weighted bidirectional connection between two neurons.
void SetWeight(Double_t w)
Sets the weight of the synapse.
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
MsgLogger & Endl(MsgLogger &ml)
Double_t Sqrt(Double_t x)
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
static void output(int code)