1 #ifndef TMVA_NEURAL_NET_I 2 #define TMVA_NEURAL_NET_I 4 #pragma GCC diagnostic ignored "-Wunused-variable" 28 return from + (rand ()* (to - from)/RAND_MAX);
33 template <
typename Container,
typename T>
36 for (
auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
44 static std::shared_ptr<std::function<double(double)>>
ZeroFnc = std::make_shared<std::function<double(double)>> ([](
double ){
return 0; });
47 static std::shared_ptr<std::function<double(double)>>
Sigmoid = std::make_shared<std::function<double(double)>> ([](
double value){ value = std::max (-100.0, std::min (100.0,value));
return 1.0/(1.0 +
std::exp (-value)); });
48 static std::shared_ptr<std::function<double(double)>>
InvSigmoid = std::make_shared<std::function<double(double)>> ([](
double value){
double s = (*Sigmoid.get ()) (value);
return s*(1.0-s); });
50 static std::shared_ptr<std::function<double(double)>>
Tanh = std::make_shared<std::function<double(double)>> ([](
double value){
return tanh (value); });
51 static std::shared_ptr<std::function<double(double)>>
InvTanh = std::make_shared<std::function<double(double)>> ([](
double value){
return 1.0 -
std::pow (value, 2.0); });
53 static std::shared_ptr<std::function<double(double)>>
Linear = std::make_shared<std::function<double(double)>> ([](
double value){
return value; });
54 static std::shared_ptr<std::function<double(double)>>
InvLinear = std::make_shared<std::function<double(double)>> ([](
double ){
return 1.0; });
56 static std::shared_ptr<std::function<double(double)>>
SymmReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.3;
return value > margin ? value-margin : value < -margin ? value+margin : 0; });
57 static std::shared_ptr<std::function<double(double)>>
InvSymmReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.3;
return value > margin ? 1.0 : value < -margin ? 1.0 : 0; });
59 static std::shared_ptr<std::function<double(double)>>
ReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.0;
return value > margin ? value-margin : 0; });
60 static std::shared_ptr<std::function<double(double)>>
InvReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.0;
return value > margin ? 1.0 : 0; });
62 static std::shared_ptr<std::function<double(double)>>
SoftPlus = std::make_shared<std::function<double(double)>> ([](
double value){
return std::log (1.0+
std::exp (value)); });
63 static std::shared_ptr<std::function<double(double)>>
InvSoftPlus = std::make_shared<std::function<double(double)>> ([](
double value){
return 1.0 / (1.0 +
std::exp (-value)); });
65 static std::shared_ptr<std::function<double(double)>>
TanhShift = std::make_shared<std::function<double(double)>> ([](
double value){
return tanh (value-0.3); });
66 static std::shared_ptr<std::function<double(double)>>
InvTanhShift = std::make_shared<std::function<double(double)>> ([](
double value){
return 0.3 + (1.0 -
std::pow (value, 2.0)); });
68 static std::shared_ptr<std::function<double(double)>>
SoftSign = std::make_shared<std::function<double(double)>> ([](
double value){
return value / (1.0 +
fabs (value)); });
69 static std::shared_ptr<std::function<double(double)>>
InvSoftSign = std::make_shared<std::function<double(double)>> ([](
double value){
return std::pow ((1.0 -
fabs (value)),2.0); });
71 static std::shared_ptr<std::function<double(double)>>
Gauss = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return exp (-
std::pow(value*s,2.0)); });
72 static std::shared_ptr<std::function<double(double)>>
InvGauss = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return -2.0 * value * s*s * (*Gauss.get ()) (value); });
74 static std::shared_ptr<std::function<double(double)>>
GaussComplement = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return 1.0 -
exp (-
std::pow(value*s,2.0)); });
75 static std::shared_ptr<std::function<double(double)>>
InvGaussComplement = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return +2.0 * value * s*s * (*GaussComplement.get ()) (value); });
83 template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItTarget,
typename ItDrop>
86 ItTarget itTargetBegin, ItTarget itTargetEnd,
89 for (
auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
91 for (
auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
93 if (!HasDropOut || *itDrop)
94 (*itTarget) += (*itSource) * (*itWeight);
97 if (HasDropOut) ++itDrop;
110 template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItPrev,
typename ItDrop>
113 ItPrev itPrevBegin, ItPrev itPrevEnd,
116 for (
auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
118 for (
auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
120 if (!HasDropOut || *itDrop)
121 (*itPrev) += (*itCurr) * (*itWeight);
124 if (HasDropOut) ++itDrop;
139 template <
typename ItValue,
typename Fnc>
142 while (itValue != itValueEnd)
144 auto& value = (*itValue);
145 value = (*fnc.get ()) (value);
156 template <
typename ItValue,
typename Fnc,
typename InvFnc,
typename ItGradient>
157 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
159 while (itValue != itValueEnd)
161 auto& value = (*itValue);
162 value = (*fnc.get ()) (value);
163 (*itGradient) = (*invFnc.get ()) (value);
165 ++itValue; ++itGradient;
175 template <
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient>
176 void update (ItSource itSource, ItSource itSourceEnd,
177 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
178 ItTargetGradient itTargetGradientBegin,
179 ItGradient itGradient)
181 while (itSource != itSourceEnd)
183 auto itTargetDelta = itTargetDeltaBegin;
184 auto itTargetGradient = itTargetGradientBegin;
185 while (itTargetDelta != itTargetDeltaEnd)
187 (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
188 ++itTargetDelta; ++itTargetGradient; ++itGradient;
201 template <EnumRegularization Regularization>
212 inline double computeRegularization<EnumRegularization::L1> (
double weight,
const double& factorWeightDecay)
214 return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
219 inline double computeRegularization<EnumRegularization::L2> (
double weight,
const double& factorWeightDecay)
221 return factorWeightDecay * weight;
229 template <EnumRegularization Regularization,
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient,
typename ItWeight>
230 void update (ItSource itSource, ItSource itSourceEnd,
231 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
232 ItTargetGradient itTargetGradientBegin,
233 ItGradient itGradient,
237 while (itSource != itSourceEnd)
239 auto itTargetDelta = itTargetDeltaBegin;
240 auto itTargetGradient = itTargetGradientBegin;
241 while (itTargetDelta != itTargetDeltaEnd)
243 (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,
weightDecay);
244 ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
255 #define USELOCALWEIGHTS 1 263 template <
typename Function,
typename Weights,
typename PassThrough>
266 size_t numWeights = weights.size ();
281 size_t currentRepetition = 0;
294 for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
297 (*itLocWeight) += (*itPrevG);
310 double maxGrad = 0.0;
311 for (; itG != itGEnd; ++itG, ++itPrevG)
313 double currGrad = (*itG);
314 double prevGrad = (*itPrevG);
318 currGrad += prevGrad;
320 (*itPrevG) = currGrad;
329 std::cout <<
"\nlearning rate reduced to " <<
m_alpha << std::endl;
330 std::for_each (weights.begin (), weights.end (), [maxGrad](
double& w)
338 auto itW = std::begin (weights);
374 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename InvFnc>
375 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc,
double patternWeight)
377 double errorSum = 0.0;
380 ItTruth itTruth = itTruthBegin;
381 bool hasDeltas = (itDelta != itDeltaEnd);
382 for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
385 double output = (*itOutput);
386 double error = output - (*itTruth);
389 (*itDelta) = (*invFnc.get ()) (output) * error * patternWeight;
392 errorSum += error*error * patternWeight;
404 template <
typename ItProbability,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
405 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
407 bool hasDeltas = (itDelta != itDeltaEnd);
409 double errorSum = 0.0;
410 for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
412 double probability = *itProbability;
413 double truth = *itTruthBegin;
416 truth = truth < 0.5 ? 0.1 : 0.9;
419 double delta = probability - truth;
420 (*itDelta) = delta*patternWeight;
425 if (probability == 0)
430 else if (probability == 1)
436 error += - (truth *
log (probability) + (1.0-truth) *
log (1.0-probability));
437 errorSum += error * patternWeight;
450 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
451 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
453 double errorSum = 0.0;
455 bool hasDeltas = (itDelta != itDeltaEnd);
457 ItTruth itTruth = itTruthBegin;
458 for (
auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
461 double probability = (*itProbability);
462 double truth = (*itTruth);
465 (*itDelta) = probability - truth;
471 error += truth *
log (probability);
475 return -errorSum * patternWeight;
490 template <
typename ItWeight>
498 for (; itWeight != itWeightEnd; ++itWeight, ++
n)
500 double weight = (*itWeight);
503 return error + 0.5 * w * factorWeightDecay /
n;
510 for (; itWeight != itWeightEnd; ++itWeight, ++
n)
512 double weight = (*itWeight);
515 return error + 0.5 * w * factorWeightDecay /
n;
538 template <
typename LAYERDATA>
539 void forward (
const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
541 if (prevLayerData.hasDropOut ())
543 applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
544 currLayerData.weightsBegin (),
545 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
546 prevLayerData.dropOut ());
551 applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
552 currLayerData.weightsBegin (),
553 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
564 template <
typename LAYERDATA>
565 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
567 if (prevLayerData.hasDropOut ())
569 applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
570 currLayerData.weightsBegin (),
571 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
572 prevLayerData.dropOut ());
577 applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
578 currLayerData.weightsBegin (),
579 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
592 template <
typename LAYERDATA>
596 if (factorWeightDecay != 0.0)
599 update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
600 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
601 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
602 currLayerData.weightsBegin (), factorWeightDecay);
606 update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
607 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
608 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
609 currLayerData.weightsBegin (), factorWeightDecay);
613 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
614 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
615 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
620 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
621 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
622 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
644 template <
typename WeightsType,
typename DropProbabilities>
646 const DropProbabilities& drops,
649 if (drops.empty () || weights.empty ())
652 auto itWeight = std::begin (weights);
653 auto itWeightEnd = std::end (weights);
654 auto itDrop = std::begin (drops);
655 auto itDropEnd = std::end (drops);
656 size_t numNodesPrev = inputSize ();
657 double dropFractionPrev = *itDrop;
660 for (
auto& layer : layers ())
662 if (itDrop == itDropEnd)
665 size_t _numNodes = layer.numNodes ();
667 double dropFraction = *itDrop;
668 double pPrev = 1.0 - dropFractionPrev;
669 double p = 1.0 - dropFraction;
676 size_t _numWeights = layer.numWeights (numNodesPrev);
677 for (
size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
679 if (itWeight == itWeightEnd)
685 numNodesPrev = _numNodes;
686 dropFractionPrev = dropFraction;
704 template <
typename Minimizer>
706 std::vector<Pattern>& trainPattern,
707 const std::vector<Pattern>& testPattern,
715 if (fIPyMaxIter) *fIPyMaxIter = 100;
718 settings.
create (
"trainErrors", 100, 0, 100, 100, 0,1);
719 settings.
create (
"testErrors", 100, 0, 100, 100, 0,1);
721 size_t cycleCount = 0;
722 size_t testCycleCount = 0;
723 double testError = 1e20;
724 double trainError = 1e20;
725 size_t dropOutChangeCount = 0;
729 const std::vector<double>& dropFractions = settings.
dropFractions ();
730 bool isWeightsForDrop =
false;
739 size_t dropIndex = 0;
740 if (!dropFractions.empty () && dropOutChangeCount % settings.
dropRepetitions () == 0)
743 dropContainer.clear ();
744 size_t _numNodes = inputSize ();
745 double dropFraction = 0.0;
746 dropFraction = dropFractions.at (dropIndex);
748 fillDropContainer (dropContainer, dropFraction, _numNodes);
749 for (
auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
751 auto& layer = *itLayer;
752 _numNodes = layer.numNodes ();
755 if (dropFractions.size () > dropIndex)
756 dropFraction = dropFractions.at (dropIndex);
758 fillDropContainer (dropContainer, dropFraction, _numNodes);
760 isWeightsForDrop =
true;
764 trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
768 bool hasConverged =
false;
771 if (isWeightsForDrop)
773 dropOutWeightFactor (weights, dropFractions);
774 isWeightsForDrop =
false;
783 size_t numThreads = std::thread::hardware_concurrency ();
784 size_t patternPerThread = testPattern.size () / numThreads;
785 std::vector<Batch> batches;
786 auto itPat = testPattern.begin ();
788 for (
size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
790 batches.push_back (
Batch (itPat, itPat + patternPerThread));
791 itPat += patternPerThread;
793 if (itPat != testPattern.end ())
794 batches.push_back (
Batch (itPat, testPattern.end ()));
796 std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
797 for (
auto& batch : batches)
801 std::async (std::launch::async, [&]()
803 std::vector<double> localOutput;
805 double testBatchError = (*this) (passThrough, weights,
ModeOutput::FETCH, localOutput);
806 return std::make_tuple (testBatchError, localOutput);
811 auto itBatch = batches.begin ();
812 for (
auto&
f : futures)
814 std::tuple<double,std::vector<double>>
result =
f.get ();
815 testError += std::get<0>(
result) / batches.size ();
821 auto it = (*itBatch).begin ();
822 for (
double out : output)
824 settings.
testSample (0, out, (*it).output ().at (0), (*it).weight ());
834 std::vector<double>
output;
840 Batch batch (begin (testPattern), end (testPattern));
845 auto it = batch.
begin ();
846 for (
double out : output)
848 settings.
testSample (0, out, (*it).output ().at (0), (*it).weight ());
853 testError += testPatternError;
863 if (!hasConverged && !isWeightsForDrop)
865 dropOutWeightFactor (weights, dropFractions,
true);
866 isWeightsForDrop =
true;
870 ++dropOutChangeCount;
873 static double x = -1.0;
876 settings.
addPoint (
"trainErrors", cycleCount, trainError);
877 settings.
addPoint (
"testErrors", cycleCount, testError);
878 settings.
plot (
"trainErrors",
"C", 1,
kBlue);
884 fInteractive->AddPoint(cycleCount, trainError, testError);
885 if (*fExitFromTraining)
break;
892 if ((
int)cycleCount % 10 == 0) {
894 TString convText =
Form(
"(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
901 settings.
cycle (progress, convText);
907 TString convText =
Form(
"(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (
int)cycleCount);
909 settings.
cycle (progress, convText);
927 template <
typename Iterator,
typename Minimizer>
932 size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
933 size_t numBatches = numPattern/settings.
batchSize ();
934 size_t numBatches_stored = numBatches;
936 std::random_shuffle (itPatternBegin, itPatternEnd);
937 Iterator itPatternBatchBegin = itPatternBegin;
938 Iterator itPatternBatchEnd = itPatternBatchBegin;
941 std::vector<Batch> batches;
942 while (numBatches > 0)
944 std::advance (itPatternBatchEnd, settings.
batchSize ());
945 batches.push_back (
Batch (itPatternBatchBegin, itPatternBatchEnd));
946 itPatternBatchBegin = itPatternBatchEnd;
951 if (itPatternBatchEnd != itPatternEnd)
952 batches.push_back (
Batch (itPatternBatchEnd, itPatternEnd));
959 size_t numThreads = std::thread::hardware_concurrency ();
960 size_t batchesPerThread = batches.size () / numThreads;
961 typedef std::vector<Batch>::iterator batch_iterator;
962 std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
963 batch_iterator itBatchBegin = std::begin (batches);
964 batch_iterator itBatchCurrEnd = std::begin (batches);
965 batch_iterator itBatchEnd = std::end (batches);
966 for (
size_t iT = 0; iT < numThreads; ++iT)
968 if (iT == numThreads-1)
969 itBatchCurrEnd = itBatchEnd;
971 std::advance (itBatchCurrEnd, batchesPerThread);
972 batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
973 itBatchBegin = itBatchCurrEnd;
977 std::vector<std::future<double>> futures;
978 for (
auto& batchRange : batchVec)
982 std::async (std::launch::async, [&]()
984 double localError = 0.0;
985 for (
auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
988 pass_through_type settingsAndBatch (settings, batch, dropContainer);
989 Minimizer minimizerClone (minimizer);
990 localError += minimizerClone ((*this), weights, settingsAndBatch);
997 for (
auto&
f : futures)
1002 for (
auto& batch : batches)
1004 std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1005 error += minimizer ((*
this), weights, settingsAndBatch);
1009 numBatches_stored = std::max (numBatches_stored,
size_t(1));
1010 error /= numBatches_stored;
1025 template <
typename Weights>
1026 std::vector<double>
Net::compute (
const std::vector<double>& input,
const Weights& weights)
const 1028 std::vector<LayerData> layerData;
1029 layerData.reserve (m_layers.size ()+1);
1030 auto itWeight = begin (weights);
1031 auto itInputBegin = begin (input);
1032 auto itInputEnd = end (input);
1033 layerData.push_back (
LayerData (itInputBegin, itInputEnd));
1034 size_t numNodesPrev = input.size ();
1037 for (
auto& layer: m_layers)
1039 layerData.push_back (
LayerData (layer.numNodes (), itWeight,
1040 layer.activationFunction (),
1041 layer.modeOutputValues ()));
1042 size_t _numWeights = layer.numWeights (numNodesPrev);
1043 itWeight += _numWeights;
1044 numNodesPrev = layer.numNodes ();
1049 forwardPattern (m_layers, layerData);
1052 std::vector<double>
output;
1053 fetchOutput (layerData.back (),
output);
1058 template <
typename Weights,
typename PassThrough>
1061 std::vector<double> nothing;
1062 assert (numWeights () == weights.size ());
1063 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing,
false);
1067 template <
typename Weights,
typename PassThrough,
typename OutContainer>
1070 std::vector<double> nothing;
1071 assert (numWeights () == weights.size ());
1072 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer,
true);
1077 template <
typename Weights,
typename Gradients,
typename PassThrough>
1078 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients)
const 1080 std::vector<double> nothing;
1081 assert (numWeights () == weights.size ());
1082 assert (weights.size () == gradients.size ());
1083 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing,
false);
1087 template <
typename Weights,
typename Gradients,
typename PassThrough,
typename OutContainer>
1088 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients,
ModeOutput eFetch, OutContainer& outputContainer)
const 1091 assert (numWeights () == weights.size ());
1092 assert (weights.size () == gradients.size ());
1093 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer,
true);
1099 template <
typename LayerContainer,
typename DropContainer,
typename ItWeight,
typename ItGradient>
1103 ItWeight itWeightBegin,
1105 ItGradient itGradientBegin,
1106 ItGradient itGradientEnd,
1107 size_t& totalNumWeights)
const 1110 bool usesDropOut = !dropContainer.empty ();
1112 itDropOut = std::begin (dropContainer);
1114 if (_layers.empty ())
1115 throw std::string (
"no layers in this net");
1121 totalNumWeights = 0;
1122 size_t totalNumNodes = 0;
1123 std::vector<std::vector<LayerData>> layerPatternData;
1124 layerPatternData.reserve (_layers.size ()+1);
1125 ItWeight itWeight = itWeightBegin;
1126 ItGradient itGradient = itGradientBegin;
1127 size_t numNodesPrev = inputSize ();
1138 layerPatternData.push_back (std::vector<LayerData>());
1139 for (
const Pattern& _pattern : batch)
1141 std::vector<LayerData>& layerData = layerPatternData.back ();
1142 layerData.push_back (
LayerData (numNodesPrev));
1144 itInputBegin = _pattern.beginInput ();
1145 itInputEnd = _pattern.endInput ();
1146 layerData.back ().setInput (itInputBegin, itInputEnd);
1149 layerData.back ().setDropOut (itDropOut);
1155 itDropOut += _layers.back ().numNodes ();
1159 for (
auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1161 bool isOutputLayer = (itLayer+1 == itLayerEnd);
1162 bool isFirstHiddenLayer = (itLayer == begin (_layers));
1164 auto& layer = *itLayer;
1165 layerPatternData.push_back (std::vector<LayerData>());
1167 for (
const Pattern& _pattern : batch)
1169 std::vector<LayerData>& layerData = layerPatternData.back ();
1172 if (itGradientBegin == itGradientEnd)
1174 layerData.push_back (
LayerData (layer.numNodes (), itWeight,
1175 layer.activationFunction (),
1176 layer.modeOutputValues ()));
1180 layerData.push_back (
LayerData (layer.numNodes (), itWeight, itGradient,
1181 layer.activationFunction (),
1182 layer.inverseActivationFunction (),
1183 layer.modeOutputValues ()));
1188 layerData.back ().setDropOut (itDropOut);
1195 itDropOut += layer.numNodes ();
1197 size_t _numWeights = layer.numWeights (numNodesPrev);
1198 totalNumWeights += _numWeights;
1199 itWeight += _numWeights;
1200 itGradient += _numWeights;
1201 numNodesPrev = layer.numNodes ();
1202 totalNumNodes += numNodesPrev;
1205 assert (totalNumWeights > 0);
1206 return layerPatternData;
1211 template <
typename LayerContainer>
1213 std::vector<LayerData>& layerData)
const 1215 size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1216 size_t cumulativeNodeCount = 0;
1217 for (; idxLayer < idxLayerEnd; ++idxLayer)
1219 LayerData& prevLayerData = layerData.at (idxLayer);
1220 LayerData& currLayerData = layerData.at (idxLayer+1);
1222 forward (prevLayerData, currLayerData);
1231 template <
typename LayerContainer,
typename LayerPatternContainer>
1233 LayerPatternContainer& layerPatternData,
1234 std::vector<double>& valuesMean,
1235 std::vector<double>& valuesStdDev,
1236 size_t trainFromLayer)
const 1238 valuesMean.clear ();
1239 valuesStdDev.clear ();
1242 size_t cumulativeNodeCount = 0;
1243 for (
size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1245 bool doTraining = idxLayer >= trainFromLayer;
1248 std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1249 std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1251 size_t numPattern = prevLayerPatternData.size ();
1252 size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1254 std::vector<MeanVariance> means (numNodesLayer);
1256 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1258 const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1259 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1262 forward (prevLayerData, currLayerData);
1266 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1269 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1279 cumulativeNodeCount += numNodesLayer;
1286 template <
typename OutputContainer>
1292 outputContainer.insert (outputContainer.end (), lastLayerData.
valuesBegin (), lastLayerData.
valuesEnd ());
1298 outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1307 template <
typename OutputContainer>
1308 void Net::fetchOutput (
const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer)
const 1310 for (
const LayerData& lastLayerData : lastLayerPatternData)
1311 fetchOutput (lastLayerData, outputContainer);
1316 template <
typename ItWeight>
1318 std::vector<LayerData>& lastLayerData,
1320 ItWeight itWeightBegin,
1321 ItWeight itWeightEnd)
const 1323 typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1324 typename std::vector<LayerData>::iterator itLayerDataEnd = lastLayerData.end ();
1326 typename std::vector<Pattern>::const_iterator itPattern = batch.
begin ();
1327 typename std::vector<Pattern>::const_iterator itPatternEnd = batch.
end ();
1329 double sumWeights (0.0);
1330 double sumError (0.0);
1332 size_t idxPattern = 0;
1333 for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1339 const Pattern& _pattern = (*itPattern);
1340 double error = errorFunction (layerData, _pattern.
output (),
1341 itWeightBegin, itWeightEnd,
1347 return std::make_tuple (sumError, sumWeights);
1352 template <
typename Settings>
1355 size_t trainFromLayer,
1356 size_t totalNumWeights)
const 1358 bool doTraining = layerPatternData.size () > trainFromLayer;
1362 size_t idxLayer = layerPatternData.size ();
1363 for (
auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1364 itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1367 if (idxLayer <= trainFromLayer)
1370 std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1371 std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1373 size_t idxPattern = 0;
1374 for (
typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1375 itPrevLayerData = begin (prevLayerDataColl), itPrevLayerDataEnd = end (prevLayerDataColl);
1376 itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)
1378 LayerData& currLayerData = (*itCurrLayerData);
1379 LayerData& prevLayerData = *(itPrevLayerData);
1381 backward (prevLayerData, currLayerData);
1402 template <
typename LayerContainer,
typename PassThrough,
typename ItWeight,
typename ItGradient,
typename OutContainer>
1404 ItWeight itWeightBegin, ItWeight itWeightEnd,
1405 ItGradient itGradientBegin, ItGradient itGradientEnd,
1406 size_t trainFromLayer,
1407 OutContainer& outputContainer,
bool doFetchOutput)
const 1409 Settings& settings = std::get<0>(settingsAndBatch);
1410 Batch& batch = std::get<1>(settingsAndBatch);
1411 DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1413 double sumError = 0.0;
1414 double sumWeights = 0.0;
1418 size_t totalNumWeights (0);
1419 std::vector<std::vector<LayerData>> layerPatternData = prepareLayerData (_layers,
1431 std::vector<double> valuesMean;
1432 std::vector<double> valuesStdDev;
1433 forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1439 fetchOutput (layerPatternData.back (), outputContainer);
1444 std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1448 backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1452 double batchSize = std::distance (std::begin (batch), std::end (batch));
1453 for (
auto it = itGradientBegin; it != itGradientEnd; ++it)
1457 sumError /= sumWeights;
1467 template <
typename OutIterator>
1473 int numInput = inputSize ();
1480 for (
auto& layer: layers ())
1482 double nIn = numInput;
1483 double stdDev =
sqrt (2.0/nIn);
1484 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1489 numInput = layer.numNodes ();
1497 int numInput = inputSize ();
1504 for (
auto& layer: layers ())
1506 double nIn = numInput;
1507 double minVal = -
sqrt(2.0/nIn);
1508 double maxVal =
sqrt (2.0/nIn);
1509 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1515 numInput = layer.numNodes ();
1523 int numInput = inputSize ();
1530 for (
auto& layer: layers ())
1533 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1538 numInput = layer.numNodes ();
1546 int numInput = inputSize ();
1553 for (
auto& layer: layers ())
1555 double nIn = numInput;
1556 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1561 numInput = layer.numNodes ();
1576 template <
typename Container,
typename ItWeight>
1580 ItWeight itWeightEnd,
1581 double patternWeight,
1582 double factorWeightDecay,
1586 switch (m_eErrorFunction)
1599 std::vector<double> probabilities = layerData.
probabilities ();
1600 error =
crossEntropy (begin (probabilities), end (probabilities),
1601 begin (truth), end (truth),
1609 std::cout <<
"softmax." << std::endl;
1611 std::vector<double> probabilities = layerData.
probabilities ();
1613 begin (truth), end (truth),
1622 error =
weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);
void addPoint(std::string histoName, double x)
for monitoring
static std::shared_ptr< std::function< double(double)> > InvGauss
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
size_t convergenceCount() const
returns the current convergence count
virtual void cycle(double progress, TString text)
static std::shared_ptr< std::function< double(double)> > Tanh
std::vector< char > DropContainer
static std::shared_ptr< std::function< double(double)> > InvReLU
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
bool isFlagSet(T flag, T value)
static std::shared_ptr< std::function< double(double)> > InvTanh
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
size_t convergenceSteps() const
how many steps until training is deemed to have converged
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
std::vector< double > & output()
const std::vector< double > & dropFractions() const
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
EnumRegularization regularization() const
some regularization of the DNN is turned on?
bool useMultithreading() const
is multithreading turned on?
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
virtual void testIteration()
callback for monitoring and loggging
static std::shared_ptr< std::function< double(double)> > InvSoftSign
static std::shared_ptr< std::function< double(double)> > TanhShift
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
static std::shared_ptr< std::function< double(double)> > Sigmoid
virtual void startTestCycle()
callback for monitoring and loggging
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ModeOutputValues outputMode() const
returns the output mode
std::shared_ptr< std::function< double(double)> > activationFunction() const
double pow(double, double)
static std::shared_ptr< std::function< double(double)> > SymmReLU
const_iterator end() const
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
void create(std::string histoName, int bins, double min, double max)
for monitoring
T uniformFromTo(T from, T to)
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
size_t testRepetitions() const
how often is the test data tested
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
size_t dropRepetitions() const
container_type probabilities() const
computes the probabilities from the current node values and returns them
double factorWeightDecay() const
get the weight-decay factor
virtual void endTrainCycle(double)
callback for monitoring and logging
RooCmdArg Minimizer(const char *type, const char *alg=0)
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
char * Form(const char *fmt,...)
static std::shared_ptr< std::function< double(double)> > InvSoftPlus
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
double computeRegularization(double weight, const double &factorWeightDecay)
compute the regularization (L1, L2)
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
void pads(int numPads)
preparation for monitoring
std::vector< double >::const_iterator const_iterator
static std::shared_ptr< std::function< double(double)> > SoftSign
Settings for the training of the neural net.
virtual void startTrainCycle()
WeightInitializationStrategy
weight initialization strategies to be chosen from
static std::shared_ptr< std::function< double(double)> > ReLU
static std::shared_ptr< std::function< double(double)> > InvSigmoid
std::vector< double > m_localGradients
local gradients for reuse in thread.
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
static std::shared_ptr< std::function< double(double)> > GaussComplement
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
double gaussDouble(double mean, double sigma)
The Batch class encapsulates one mini-batch.
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
double m_beta
internal parameter (momentum)
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
size_t maxConvergenceCount() const
returns the max convergence count so far
static RooMathCoreReg dummy
static std::shared_ptr< std::function< double(double)> > Gauss
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
double uniformDouble(double minValue, double maxValue)
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
static std::shared_ptr< std::function< double(double)> > Linear
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Abstract ClassifierFactory template that handles arbitrary types.
static std::shared_ptr< std::function< double(double)> > InvGaussComplement
static std::shared_ptr< std::function< double(double)> > InvLinear
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
size_t batchSize() const
mini-batch size
virtual void endTestCycle()
callback for monitoring and loggging
static std::shared_ptr< std::function< double(double)> > InvTanhShift
std::vector< double > m_localWeights
local weights for reuse in thread.
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
void backPropagate(std::vector< std::vector< LayerData >> &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
const_iterator begin() const
DropContainer::const_iterator const_dropout_iterator
double m_alpha
internal parameter (learningRate)
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
static std::shared_ptr< std::function< double(double)> > SoftPlus
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
static std::shared_ptr< std::function< double(double)> > ZeroFnc
virtual bool hasConverged(double testError)
has this training converged already?
LayerData holds the data of one layer.
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
static std::shared_ptr< std::function< double(double)> > InvSymmReLU