1#ifndef TMVA_NEURAL_NET_I
2#define TMVA_NEURAL_NET_I
5#error "Do not use NeuralNet.icc directly. #include \"NeuralNet.h\" instead."
8#pragma GCC diagnostic ignored "-Wunused-variable"
34 return from + (rand ()* (to - from)/RAND_MAX);
39 template <
typename Container,
typename T>
42 for (
auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
88template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItTarget,
typename ItDrop>
91 ItTarget itTargetBegin, ItTarget itTargetEnd,
94 for (
auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
96 for (
auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
98 if (!HasDropOut || *itDrop)
99 (*itTarget) += (*itSource) * (*itWeight);
102 if (HasDropOut) ++itDrop;
115template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItPrev,
typename ItDrop>
118 ItPrev itPrevBegin, ItPrev itPrevEnd,
121 for (
auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
123 for (
auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
125 if (!HasDropOut || *itDrop)
126 (*itPrev) += (*itCurr) * (*itWeight);
129 if (HasDropOut) ++itDrop;
144 template <
typename ItValue,
typename Fnc>
147 while (itValue != itValueEnd)
149 auto& value = (*itValue);
150 value = (*fnc.get ()) (value);
161 template <
typename ItValue,
typename Fnc,
typename InvFnc,
typename ItGradient>
162 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
164 while (itValue != itValueEnd)
166 auto& value = (*itValue);
167 value = (*fnc.get ()) (value);
168 (*itGradient) = (*invFnc.get ()) (value);
170 ++itValue; ++itGradient;
180 template <
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient>
181 void update (ItSource itSource, ItSource itSourceEnd,
182 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
183 ItTargetGradient itTargetGradientBegin,
184 ItGradient itGradient)
186 while (itSource != itSourceEnd)
188 auto itTargetDelta = itTargetDeltaBegin;
189 auto itTargetGradient = itTargetGradientBegin;
190 while (itTargetDelta != itTargetDeltaEnd)
192 (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
193 ++itTargetDelta; ++itTargetGradient; ++itGradient;
206 template <EnumRegularization Regularization>
217 inline double computeRegularization<EnumRegularization::L1> (
double weight,
const double& factorWeightDecay)
219 return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
224 inline double computeRegularization<EnumRegularization::L2> (
double weight,
const double& factorWeightDecay)
226 return factorWeightDecay * weight;
234 template <EnumRegularization Regularization,
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient,
typename ItWeight>
235 void update (ItSource itSource, ItSource itSourceEnd,
236 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
237 ItTargetGradient itTargetGradientBegin,
238 ItGradient itGradient,
242 while (itSource != itSourceEnd)
244 auto itTargetDelta = itTargetDeltaBegin;
245 auto itTargetGradient = itTargetGradientBegin;
246 while (itTargetDelta != itTargetDeltaEnd)
248 (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,
weightDecay);
249 ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
260#define USELOCALWEIGHTS 1
268 template <
typename Function,
typename Weights,
typename PassThrough>
271 size_t numWeights = weights.size ();
286 size_t currentRepetition = 0;
299 for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
302 (*itLocWeight) += (*itPrevG);
315 double maxGrad = 0.0;
316 for (; itG != itGEnd; ++itG, ++itPrevG)
318 double currGrad = (*itG);
319 double prevGrad = (*itPrevG);
323 currGrad += prevGrad;
325 (*itPrevG) = currGrad;
334 std::cout <<
"\nlearning rate reduced to " <<
m_alpha << std::endl;
335 std::for_each (weights.begin (), weights.end (), [maxGrad](
double& w)
343 auto itW = std::begin (weights);
379 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename InvFnc>
380 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc,
double patternWeight)
382 double errorSum = 0.0;
385 ItTruth itTruth = itTruthBegin;
386 bool hasDeltas = (itDelta != itDeltaEnd);
387 for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
390 double output = (*itOutput);
391 double error =
output - (*itTruth);
394 (*itDelta) = (*invFnc.get ()) (
output) * error * patternWeight;
397 errorSum += error*error * patternWeight;
409 template <
typename ItProbability,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
410 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
412 bool hasDeltas = (itDelta != itDeltaEnd);
414 double errorSum = 0.0;
415 for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
417 double probability = *itProbability;
418 double truth = *itTruthBegin;
421 truth = truth < 0.5 ? 0.1 : 0.9;
424 double delta = probability - truth;
425 (*itDelta) = delta*patternWeight;
430 if (probability == 0)
435 else if (probability == 1)
441 error += - (truth *
log (probability) + (1.0-truth) *
log (1.0-probability));
442 errorSum += error * patternWeight;
455 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
456 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
458 double errorSum = 0.0;
460 bool hasDeltas = (itDelta != itDeltaEnd);
462 ItTruth itTruth = itTruthBegin;
463 for (
auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
466 double probability = (*itProbability);
467 double truth = (*itTruth);
470 (*itDelta) = probability - truth;
476 error += truth *
log (probability);
480 return -errorSum * patternWeight;
495 template <
typename ItWeight>
503 for (; itWeight != itWeightEnd; ++itWeight, ++
n)
505 double weight = (*itWeight);
508 return error + 0.5 * w * factorWeightDecay /
n;
515 for (; itWeight != itWeightEnd; ++itWeight, ++
n)
517 double weight = (*itWeight);
520 return error + 0.5 * w * factorWeightDecay /
n;
543 template <
typename LAYERDATA>
544 void forward (
const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
546 if (prevLayerData.hasDropOut ())
548 applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
549 currLayerData.weightsBegin (),
550 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
551 prevLayerData.dropOut ());
556 applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
557 currLayerData.weightsBegin (),
558 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
569template <
typename LAYERDATA>
570 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
572 if (prevLayerData.hasDropOut ())
574 applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
575 currLayerData.weightsBegin (),
576 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
577 prevLayerData.dropOut ());
582 applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
583 currLayerData.weightsBegin (),
584 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
597 template <
typename LAYERDATA>
601 if (factorWeightDecay != 0.0)
604 update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
605 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
606 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
607 currLayerData.weightsBegin (), factorWeightDecay);
611 update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
612 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
613 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
614 currLayerData.weightsBegin (), factorWeightDecay);
618 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
619 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
620 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
625 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
626 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
627 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
649 template <
typename WeightsType,
typename DropProbabilities>
651 const DropProbabilities& drops,
654 if (drops.empty () || weights.empty ())
657 auto itWeight = std::begin (weights);
658 auto itWeightEnd = std::end (weights);
659 auto itDrop = std::begin (drops);
660 auto itDropEnd = std::end (drops);
662 double dropFractionPrev = *itDrop;
665 for (
auto& layer :
layers ())
667 if (itDrop == itDropEnd)
670 size_t _numNodes = layer.numNodes ();
672 double dropFraction = *itDrop;
673 double pPrev = 1.0 - dropFractionPrev;
674 double p = 1.0 - dropFraction;
681 size_t _numWeights = layer.numWeights (numNodesPrev);
682 for (
size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
684 if (itWeight == itWeightEnd)
690 numNodesPrev = _numNodes;
691 dropFractionPrev = dropFraction;
709 template <
typename Minimizer>
711 std::vector<Pattern>& trainPattern,
712 const std::vector<Pattern>& testPattern,
723 settings.
create (
"trainErrors", 100, 0, 100, 100, 0,1);
724 settings.
create (
"testErrors", 100, 0, 100, 100, 0,1);
726 size_t cycleCount = 0;
727 size_t testCycleCount = 0;
728 double testError = 1e20;
729 double trainError = 1e20;
730 size_t dropOutChangeCount = 0;
734 const std::vector<double>& dropFractions = settings.
dropFractions ();
735 bool isWeightsForDrop =
false;
744 size_t dropIndex = 0;
745 if (!dropFractions.empty () && dropOutChangeCount % settings.
dropRepetitions () == 0)
748 dropContainer.clear ();
750 double dropFraction = 0.0;
751 dropFraction = dropFractions.at (dropIndex);
754 for (
auto itLayer = begin (
m_layers), itLayerEnd = end (
m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
756 auto& layer = *itLayer;
757 _numNodes = layer.numNodes ();
760 if (dropFractions.size () > dropIndex)
761 dropFraction = dropFractions.at (dropIndex);
765 isWeightsForDrop =
true;
769 trainError =
trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
773 bool hasConverged =
false;
776 if (isWeightsForDrop)
779 isWeightsForDrop =
false;
788 size_t numThreads = std::thread::hardware_concurrency ();
789 size_t patternPerThread = testPattern.size () / numThreads;
790 std::vector<Batch> batches;
791 auto itPat = testPattern.begin ();
793 for (
size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
795 batches.push_back (
Batch (itPat, itPat + patternPerThread));
796 itPat += patternPerThread;
798 if (itPat != testPattern.end ())
799 batches.push_back (
Batch (itPat, testPattern.end ()));
801 std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
802 for (
auto& batch : batches)
806 std::async (std::launch::async, [&]()
808 std::vector<double> localOutput;
810 double testBatchError = (*this) (passThrough, weights,
ModeOutput::FETCH, localOutput);
811 return std::make_tuple (testBatchError, localOutput);
816 auto itBatch = batches.begin ();
817 for (
auto&
f : futures)
819 std::tuple<double,std::vector<double>> result =
f.get ();
820 testError += std::get<0>(result) / batches.size ();
821 std::vector<double>
output = std::get<1>(result);
824 auto output_iterator =
output.begin();
825 for (
auto pattern_it = itBatch->begin(); pattern_it != itBatch->end(); ++pattern_it)
827 for (
size_t output_index = 1; output_index <
outputSize(); ++output_index)
829 settings.
testSample (0, *output_iterator, (*pattern_it).output ().at (0),
830 (*pattern_it).weight ());
841 std::vector<double>
output;
847 Batch batch (begin (testPattern), end (testPattern));
853 auto output_iterator =
output.begin();
854 for (
auto pattern_it = batch.
begin(); pattern_it != batch.
end(); ++pattern_it)
856 for (
size_t output_index = 1; output_index <
outputSize(); ++output_index)
858 settings.
testSample (0, *output_iterator, (*pattern_it).output ().at (0),
859 (*pattern_it).weight ());
864 testError += testPatternError;
874 if (!hasConverged && !isWeightsForDrop)
877 isWeightsForDrop =
true;
881 ++dropOutChangeCount;
884 static double x = -1.0;
887 settings.
addPoint (
"trainErrors", cycleCount, trainError);
888 settings.
addPoint (
"testErrors", cycleCount, testError);
889 settings.
plot (
"trainErrors",
"C", 1,
kBlue);
903 if ((
int)cycleCount % 10 == 0) {
905 TString convText =
Form(
"(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
912 settings.
cycle (progress, convText);
918 TString convText =
Form(
"(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (
int)cycleCount);
920 settings.
cycle (progress, convText);
938 template <
typename Iterator,
typename Minimizer>
943 size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
944 size_t numBatches = numPattern/settings.
batchSize ();
945 size_t numBatches_stored = numBatches;
947 std::shuffle(itPatternBegin, itPatternEnd, std::default_random_engine{});
948 Iterator itPatternBatchBegin = itPatternBegin;
949 Iterator itPatternBatchEnd = itPatternBatchBegin;
952 std::vector<Batch> batches;
953 while (numBatches > 0)
955 std::advance (itPatternBatchEnd, settings.
batchSize ());
956 batches.push_back (
Batch (itPatternBatchBegin, itPatternBatchEnd));
957 itPatternBatchBegin = itPatternBatchEnd;
962 if (itPatternBatchEnd != itPatternEnd)
963 batches.push_back (
Batch (itPatternBatchEnd, itPatternEnd));
970 size_t numThreads = std::thread::hardware_concurrency ();
971 size_t batchesPerThread = batches.size () / numThreads;
972 typedef std::vector<Batch>::iterator batch_iterator;
973 std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
974 batch_iterator itBatchBegin = std::begin (batches);
975 batch_iterator itBatchCurrEnd = std::begin (batches);
976 batch_iterator itBatchEnd = std::end (batches);
977 for (
size_t iT = 0; iT < numThreads; ++iT)
979 if (iT == numThreads-1)
980 itBatchCurrEnd = itBatchEnd;
982 std::advance (itBatchCurrEnd, batchesPerThread);
983 batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
984 itBatchBegin = itBatchCurrEnd;
988 std::vector<std::future<double>> futures;
989 for (
auto& batchRange : batchVec)
993 std::async (std::launch::async, [&]()
995 double localError = 0.0;
996 for (
auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
999 pass_through_type settingsAndBatch (settings, batch, dropContainer);
1000 Minimizer minimizerClone (minimizer);
1001 localError += minimizerClone ((*this), weights, settingsAndBatch);
1008 for (
auto&
f : futures)
1013 for (
auto& batch : batches)
1015 std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1016 error += minimizer ((*
this), weights, settingsAndBatch);
1020 numBatches_stored = std::max (numBatches_stored,
size_t(1));
1021 error /= numBatches_stored;
1036 template <
typename Weights>
1037 std::vector<double>
Net::compute (
const std::vector<double>& input,
const Weights& weights)
const
1039 std::vector<LayerData> layerData;
1040 layerData.reserve (
m_layers.size ()+1);
1041 auto itWeight = begin (weights);
1042 auto itInputBegin = begin (input);
1043 auto itInputEnd = end (input);
1044 layerData.push_back (
LayerData (itInputBegin, itInputEnd));
1045 size_t numNodesPrev = input.size ();
1050 layerData.push_back (
LayerData (layer.numNodes (), itWeight,
1051 layer.activationFunction (),
1052 layer.modeOutputValues ()));
1053 size_t _numWeights = layer.numWeights (numNodesPrev);
1054 itWeight += _numWeights;
1055 numNodesPrev = layer.numNodes ();
1063 std::vector<double>
output;
1069 template <
typename Weights,
typename PassThrough>
1072 std::vector<double> nothing;
1074 double error =
forward_backward(
m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing,
false);
1078 template <
typename Weights,
typename PassThrough,
typename OutContainer>
1081 std::vector<double> nothing;
1083 double error =
forward_backward(
m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer,
true);
1088 template <
typename Weights,
typename Gradients,
typename PassThrough>
1089 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients)
const
1091 std::vector<double> nothing;
1093 assert (weights.size () == gradients.size ());
1094 double error =
forward_backward(
m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing,
false);
1098 template <
typename Weights,
typename Gradients,
typename PassThrough,
typename OutContainer>
1099 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients,
ModeOutput eFetch, OutContainer& outputContainer)
const
1103 assert (weights.size () == gradients.size ());
1104 double error =
forward_backward(
m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer,
true);
1110 template <
typename LayerContainer,
typename DropContainer,
typename ItWeight,
typename ItGradient>
1114 ItWeight itWeightBegin,
1116 ItGradient itGradientBegin,
1117 ItGradient itGradientEnd,
1118 size_t& totalNumWeights)
const
1121 bool usesDropOut = !dropContainer.empty ();
1123 itDropOut = std::begin (dropContainer);
1125 if (_layers.empty ())
1126 throw std::string (
"no layers in this net");
1132 totalNumWeights = 0;
1133 size_t totalNumNodes = 0;
1134 std::vector<std::vector<LayerData>> layerPatternData;
1135 layerPatternData.reserve (_layers.size ()+1);
1136 ItWeight itWeight = itWeightBegin;
1137 ItGradient itGradient = itGradientBegin;
1149 layerPatternData.push_back (std::vector<LayerData>());
1150 for (
const Pattern& _pattern : batch)
1152 std::vector<LayerData>& layerData = layerPatternData.back ();
1153 layerData.push_back (
LayerData (numNodesPrev));
1155 itInputBegin = _pattern.beginInput ();
1156 itInputEnd = _pattern.endInput ();
1157 layerData.back ().setInput (itInputBegin, itInputEnd);
1160 layerData.back ().setDropOut (itDropOut);
1166 itDropOut += _layers.back ().numNodes ();
1170 for (
auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1172 bool isOutputLayer = (itLayer+1 == itLayerEnd);
1173 bool isFirstHiddenLayer = (itLayer == begin (_layers));
1175 auto& layer = *itLayer;
1176 layerPatternData.push_back (std::vector<LayerData>());
1178 for (
const Pattern& _pattern : batch)
1180 std::vector<LayerData>& layerData = layerPatternData.back ();
1183 if (itGradientBegin == itGradientEnd)
1185 layerData.push_back (
LayerData (layer.numNodes (), itWeight,
1186 layer.activationFunction (),
1187 layer.modeOutputValues ()));
1191 layerData.push_back (
LayerData (layer.numNodes (), itWeight, itGradient,
1192 layer.activationFunction (),
1193 layer.inverseActivationFunction (),
1194 layer.modeOutputValues ()));
1199 layerData.back ().setDropOut (itDropOut);
1206 itDropOut += layer.numNodes ();
1208 size_t _numWeights = layer.numWeights (numNodesPrev);
1209 totalNumWeights += _numWeights;
1210 itWeight += _numWeights;
1211 itGradient += _numWeights;
1212 numNodesPrev = layer.numNodes ();
1213 totalNumNodes += numNodesPrev;
1216 assert (totalNumWeights > 0);
1217 return layerPatternData;
1222 template <
typename LayerContainer>
1224 std::vector<LayerData>& layerData)
const
1226 size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1227 size_t cumulativeNodeCount = 0;
1228 for (; idxLayer < idxLayerEnd; ++idxLayer)
1230 LayerData& prevLayerData = layerData.at (idxLayer);
1231 LayerData& currLayerData = layerData.at (idxLayer+1);
1233 forward (prevLayerData, currLayerData);
1242 template <
typename LayerContainer,
typename LayerPatternContainer>
1244 LayerPatternContainer& layerPatternData,
1245 std::vector<double>& valuesMean,
1246 std::vector<double>& valuesStdDev,
1247 size_t trainFromLayer)
const
1249 valuesMean.clear ();
1250 valuesStdDev.clear ();
1253 size_t cumulativeNodeCount = 0;
1254 for (
size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1256 bool doTraining = idxLayer >= trainFromLayer;
1259 std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1260 std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1262 size_t numPattern = prevLayerPatternData.size ();
1263 size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1265 std::vector<MeanVariance> means (numNodesLayer);
1267 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1269 const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1270 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1273 forward (prevLayerData, currLayerData);
1277 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1280 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1290 cumulativeNodeCount += numNodesLayer;
1297 template <
typename OutputContainer>
1303 outputContainer.insert (outputContainer.end (), lastLayerData.
valuesBegin (), lastLayerData.
valuesEnd ());
1309 outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1318 template <
typename OutputContainer>
1319 void Net::fetchOutput (
const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer)
const
1321 for (
const LayerData& lastLayerData : lastLayerPatternData)
1327 template <
typename ItWeight>
1329 std::vector<LayerData>& lastLayerData,
1331 ItWeight itWeightBegin,
1332 ItWeight itWeightEnd)
const
1334 typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1337 typename std::vector<Pattern>::const_iterator itPattern = batch.
begin ();
1338 typename std::vector<Pattern>::const_iterator itPatternEnd = batch.
end ();
1340 double sumWeights (0.0);
1341 double sumError (0.0);
1343 size_t idxPattern = 0;
1345 for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1351 const Pattern& _pattern = (*itPattern);
1353 itWeightBegin, itWeightEnd,
1359 return std::make_tuple (sumError, sumWeights);
1364 template <
typename Settings>
1367 size_t trainFromLayer,
1368 size_t totalNumWeights)
const
1370 bool doTraining = layerPatternData.size () > trainFromLayer;
1374 size_t idxLayer = layerPatternData.size ();
1375 for (
auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1376 itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1379 if (idxLayer <= trainFromLayer)
1382 std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1383 std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1385 size_t idxPattern = 0;
1387 for (
typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1388 itPrevLayerData = begin (prevLayerDataColl) ;
1389 itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)
1391 LayerData& currLayerData = (*itCurrLayerData);
1392 LayerData& prevLayerData = *(itPrevLayerData);
1394 backward (prevLayerData, currLayerData);
1415 template <
typename LayerContainer,
typename PassThrough,
typename ItWeight,
typename ItGradient,
typename OutContainer>
1417 ItWeight itWeightBegin, ItWeight itWeightEnd,
1418 ItGradient itGradientBegin, ItGradient itGradientEnd,
1419 size_t trainFromLayer,
1420 OutContainer& outputContainer,
bool doFetchOutput)
const
1422 Settings& settings = std::get<0>(settingsAndBatch);
1423 Batch& batch = std::get<1>(settingsAndBatch);
1424 DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1426 double sumError = 0.0;
1427 double sumWeights = 0.0;
1431 size_t totalNumWeights (0);
1432 std::vector<std::vector<LayerData>> layerPatternData =
prepareLayerData (_layers,
1444 std::vector<double> valuesMean;
1445 std::vector<double> valuesStdDev;
1446 forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1452 fetchOutput (layerPatternData.back (), outputContainer);
1457 std::tie (sumError, sumWeights) =
computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1461 backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1465 double batchSize = std::distance (std::begin (batch), std::end (batch));
1466 for (
auto it = itGradientBegin; it != itGradientEnd; ++it)
1470 sumError /= sumWeights;
1480 template <
typename OutIterator>
1493 for (
auto& layer:
layers ())
1495 double nIn = numInput;
1496 double stdDev =
sqrt (2.0/nIn);
1497 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1502 numInput = layer.numNodes ();
1517 for (
auto& layer:
layers ())
1519 double nIn = numInput;
1520 double minVal = -
sqrt(2.0/nIn);
1521 double maxVal =
sqrt (2.0/nIn);
1522 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1528 numInput = layer.numNodes ();
1543 for (
auto& layer:
layers ())
1546 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1551 numInput = layer.numNodes ();
1566 for (
auto& layer:
layers ())
1568 double nIn = numInput;
1569 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1574 numInput = layer.numNodes ();
1589 template <
typename Container,
typename ItWeight>
1593 ItWeight itWeightEnd,
1594 double patternWeight,
1595 double factorWeightDecay,
1612 std::vector<double> probabilities = layerData.
probabilities ();
1613 error =
crossEntropy (begin (probabilities), end (probabilities),
1614 begin (truth), end (truth),
1622 std::cout <<
"softmax." << std::endl;
1624 std::vector<double> probabilities = layerData.
probabilities ();
1626 begin (truth), end (truth),
1635 error =
weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);
static RooMathCoreReg dummy
char * Form(const char *fmt,...)
Double_t(* Function)(Double_t)
std::vector< double >::const_iterator const_iterator
std::vector< double > & output()
The Batch class encapsulates one mini-batch.
const_iterator begin() const
const_iterator end() const
LayerData holds the data of one layer.
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
ModeOutputValues outputMode() const
returns the output mode
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
std::shared_ptr< std::function< double(double)> > activationFunction() const
container_type probabilities() const
computes the probabilities from the current node values and returns them
DropContainer::const_iterator const_dropout_iterator
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
std::vector< Layer > m_layers
layer-structure-data
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
size_t inputSize() const
input size of the DNN
ModeErrorFunction m_eErrorFunction
denotes the error function
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
const std::vector< Layer > & layers() const
returns the layers (structure)
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
size_t outputSize() const
output size of the DNN
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
void fillDropContainer(DropContainer &dropContainer, double dropFraction, size_t numNodes) const
prepare the drop-out-container (select the nodes which are to be dropped out)
size_t numWeights(size_t trainingStartLayer=0) const
returns the number of weights in this net
IPythonInteractive * fInteractive
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
void backPropagate(std::vector< std::vector< LayerData > > &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Settings for the training of the neural net.
bool useMultithreading() const
is multithreading turned on?
EnumRegularization regularization() const
some regularization of the DNN is turned on?
size_t convergenceCount() const
returns the current convergence count
size_t testRepetitions() const
how often is the test data tested
virtual void endTestCycle()
callback for monitoring and loggging
virtual void testIteration()
callback for monitoring and loggging
virtual bool hasConverged(double testError)
has this training converged already?
virtual void cycle(double progress, TString text)
virtual void endTrainCycle(double)
callback for monitoring and logging
const std::vector< double > & dropFractions() const
void addPoint(std::string histoName, double x)
for monitoring
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
virtual void startTrainCycle()
size_t convergenceSteps() const
how many steps until training is deemed to have converged
double factorWeightDecay() const
get the weight-decay factor
size_t maxConvergenceCount() const
returns the max convergence count so far
void pads(int numPads)
preparation for monitoring
size_t batchSize() const
mini-batch size
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
size_t dropRepetitions() const
void create(std::string histoName, int bins, double min, double max)
for monitoring
virtual void startTestCycle()
callback for monitoring and loggging
double m_beta
internal parameter (momentum)
std::vector< double > m_localGradients
local gradients for reuse in thread.
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
double m_alpha
internal parameter (learningRate)
std::vector< double > m_localWeights
local weights for reuse in thread.
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
RooCmdArg Minimizer(const char *type, const char *alg=0)
std::shared_ptr< std::function< double(double)> > InvGauss
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
double uniformDouble(double minValue, double maxValue)
std::shared_ptr< std::function< double(double)> > SymmReLU
std::shared_ptr< std::function< double(double)> > TanhShift
std::shared_ptr< std::function< double(double)> > Tanh
std::shared_ptr< std::function< double(double)> > InvSigmoid
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
T uniformFromTo(T from, T to)
std::shared_ptr< std::function< double(double)> > SoftPlus
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
std::shared_ptr< std::function< double(double)> > ZeroFnc
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
std::shared_ptr< std::function< double(double)> > InvSoftSign
std::shared_ptr< std::function< double(double)> > InvGaussComplement
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
@ CROSSENTROPY_MUTUALEXCLUSIVE
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
std::shared_ptr< std::function< double(double)> > InvTanh
std::shared_ptr< std::function< double(double)> > Linear
WeightInitializationStrategy
weight initialization strategies to be chosen from
std::shared_ptr< std::function< double(double)> > InvReLU
std::shared_ptr< std::function< double(double)> > GaussComplement
std::shared_ptr< std::function< double(double)> > Gauss
std::shared_ptr< std::function< double(double)> > Sigmoid
double gaussDouble(double mean, double sigma)
std::shared_ptr< std::function< double(double)> > SoftSign
std::shared_ptr< std::function< double(double)> > InvSoftPlus
std::shared_ptr< std::function< double(double)> > ReLU
double computeRegularization(double weight, const double &factorWeightDecay)
compute the regularization (L1, L2)
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
bool isFlagSet(T flag, T value)
std::shared_ptr< std::function< double(double)> > InvTanhShift
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
std::vector< char > DropContainer
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
std::shared_ptr< std::function< double(double)> > InvSymmReLU
std::shared_ptr< std::function< double(double)> > InvLinear
Abstract ClassifierFactory template that handles arbitrary types.
constexpr Double_t E()
Base of natural log: