Logo ROOT   6.08/07
Reference Guide
NeuralNet.icc
Go to the documentation of this file.
1 #ifndef TMVA_NEURAL_NET_I
2 #define TMVA_NEURAL_NET_I
3 #pragma once
4 #pragma GCC diagnostic ignored "-Wunused-variable"
5 
6 #include <tuple>
7 #include <future>
8 
9 #include "Math/Util.h"
10 
11 #include "MethodBase.h"
12 
13 namespace TMVA
14 {
15  namespace DNN
16  {
17 
18 
19 
20 
21 
22 
23 
24 
25  template <typename T>
26  T uniformFromTo (T from, T to)
27  {
28  return from + (rand ()* (to - from)/RAND_MAX);
29  }
30 
31 
32 
33  template <typename Container, typename T>
34  void uniformDouble (Container& container, T maxValue)
35  {
36  for (auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
37  {
38 // (*it) = uniformFromTo (-1.0*maxValue, 1.0*maxValue);
39  (*it) = TMVA::DNN::uniformFromTo (-1.0*maxValue, 1.0*maxValue);
40  }
41  }
42 
43 
44  static std::shared_ptr<std::function<double(double)>> ZeroFnc = std::make_shared<std::function<double(double)>> ([](double /*value*/){ return 0; });
45 
46 
47  static std::shared_ptr<std::function<double(double)>> Sigmoid = std::make_shared<std::function<double(double)>> ([](double value){ value = std::max (-100.0, std::min (100.0,value)); return 1.0/(1.0 + std::exp (-value)); });
48  static std::shared_ptr<std::function<double(double)>> InvSigmoid = std::make_shared<std::function<double(double)>> ([](double value){ double s = (*Sigmoid.get ()) (value); return s*(1.0-s); });
49 
50  static std::shared_ptr<std::function<double(double)>> Tanh = std::make_shared<std::function<double(double)>> ([](double value){ return tanh (value); });
51  static std::shared_ptr<std::function<double(double)>> InvTanh = std::make_shared<std::function<double(double)>> ([](double value){ return 1.0 - std::pow (value, 2.0); });
52 
53  static std::shared_ptr<std::function<double(double)>> Linear = std::make_shared<std::function<double(double)>> ([](double value){ return value; });
54  static std::shared_ptr<std::function<double(double)>> InvLinear = std::make_shared<std::function<double(double)>> ([](double /*value*/){ return 1.0; });
55 
56  static std::shared_ptr<std::function<double(double)>> SymmReLU = std::make_shared<std::function<double(double)>> ([](double value){ const double margin = 0.3; return value > margin ? value-margin : value < -margin ? value+margin : 0; });
57  static std::shared_ptr<std::function<double(double)>> InvSymmReLU = std::make_shared<std::function<double(double)>> ([](double value){ const double margin = 0.3; return value > margin ? 1.0 : value < -margin ? 1.0 : 0; });
58 
59  static std::shared_ptr<std::function<double(double)>> ReLU = std::make_shared<std::function<double(double)>> ([](double value){ const double margin = 0.0; return value > margin ? value-margin : 0; });
60  static std::shared_ptr<std::function<double(double)>> InvReLU = std::make_shared<std::function<double(double)>> ([](double value){ const double margin = 0.0; return value > margin ? 1.0 : 0; });
61 
62  static std::shared_ptr<std::function<double(double)>> SoftPlus = std::make_shared<std::function<double(double)>> ([](double value){ return std::log (1.0+ std::exp (value)); });
63  static std::shared_ptr<std::function<double(double)>> InvSoftPlus = std::make_shared<std::function<double(double)>> ([](double value){ return 1.0 / (1.0 + std::exp (-value)); });
64 
65  static std::shared_ptr<std::function<double(double)>> TanhShift = std::make_shared<std::function<double(double)>> ([](double value){ return tanh (value-0.3); });
66  static std::shared_ptr<std::function<double(double)>> InvTanhShift = std::make_shared<std::function<double(double)>> ([](double value){ return 0.3 + (1.0 - std::pow (value, 2.0)); });
67 
68  static std::shared_ptr<std::function<double(double)>> SoftSign = std::make_shared<std::function<double(double)>> ([](double value){ return value / (1.0 + fabs (value)); });
69  static std::shared_ptr<std::function<double(double)>> InvSoftSign = std::make_shared<std::function<double(double)>> ([](double value){ return std::pow ((1.0 - fabs (value)),2.0); });
70 
71  static std::shared_ptr<std::function<double(double)>> Gauss = std::make_shared<std::function<double(double)>> ([](double value){ const double s = 6.0; return exp (-std::pow(value*s,2.0)); });
72  static std::shared_ptr<std::function<double(double)>> InvGauss = std::make_shared<std::function<double(double)>> ([](double value){ const double s = 6.0; return -2.0 * value * s*s * (*Gauss.get ()) (value); });
73 
74  static std::shared_ptr<std::function<double(double)>> GaussComplement = std::make_shared<std::function<double(double)>> ([](double value){ const double s = 6.0; return 1.0 - exp (-std::pow(value*s,2.0)); });
75  static std::shared_ptr<std::function<double(double)>> InvGaussComplement = std::make_shared<std::function<double(double)>> ([](double value){ const double s = 6.0; return +2.0 * value * s*s * (*GaussComplement.get ()) (value); });
76 
77 
78 
79 /*! \brief apply weights using drop-out; for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"
80  *
81  * itDrop correlates with itSourceBegin
82  */
83 template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItTarget, typename ItDrop>
84  void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd,
85  ItWeight itWeight,
86  ItTarget itTargetBegin, ItTarget itTargetEnd,
87  ItDrop itDrop)
88  {
89  for (auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
90  {
91  for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
92  {
93  if (!HasDropOut || *itDrop)
94  (*itTarget) += (*itSource) * (*itWeight);
95  ++itWeight;
96  }
97  if (HasDropOut) ++itDrop;
98  }
99  }
100 
101 
102 
103 
104 
105 
106 /*! \brief apply weights backwards (for backprop); for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"
107  *
108  * itDrop correlates with itPrev (to be in agreement with "applyWeights" where it correlates with itSources (same node as itTarget here in applyBackwards)
109  */
110 template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItPrev, typename ItDrop>
111  void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd,
112  ItWeight itWeight,
113  ItPrev itPrevBegin, ItPrev itPrevEnd,
114  ItDrop itDrop)
115  {
116  for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
117  {
118  for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
119  {
120  if (!HasDropOut || *itDrop)
121  (*itPrev) += (*itCurr) * (*itWeight);
122  ++itWeight;
123  }
124  if (HasDropOut) ++itDrop;
125  }
126  }
127 
128 
129 
130 
131 
132 
133 
134 /*! \brief apply the activation functions
135  *
136  *
137  */
138 
139  template <typename ItValue, typename Fnc>
140  void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc)
141  {
142  while (itValue != itValueEnd)
143  {
144  auto& value = (*itValue);
145  value = (*fnc.get ()) (value);
146 
147  ++itValue;
148  }
149  }
150 
151 
152 /*! \brief apply the activation functions and compute the gradient
153  *
154  *
155  */
156  template <typename ItValue, typename Fnc, typename InvFnc, typename ItGradient>
157  void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
158  {
159  while (itValue != itValueEnd)
160  {
161  auto& value = (*itValue);
162  value = (*fnc.get ()) (value);
163  (*itGradient) = (*invFnc.get ()) (value);
164 
165  ++itValue; ++itGradient;
166  }
167  }
168 
169 
170 
171 /*! \brief update the gradients
172  *
173  *
174  */
175  template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
176  void update (ItSource itSource, ItSource itSourceEnd,
177  ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
178  ItTargetGradient itTargetGradientBegin,
179  ItGradient itGradient)
180  {
181  while (itSource != itSourceEnd)
182  {
183  auto itTargetDelta = itTargetDeltaBegin;
184  auto itTargetGradient = itTargetGradientBegin;
185  while (itTargetDelta != itTargetDeltaEnd)
186  {
187  (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
188  ++itTargetDelta; ++itTargetGradient; ++itGradient;
189  }
190  ++itSource;
191  }
192  }
193 
194 
195 
196 
197 /*! \brief compute the regularization (L1, L2)
198  *
199  *
200  */
201  template <EnumRegularization Regularization>
202  inline double computeRegularization (double weight, const double& factorWeightDecay)
203  {
204  MATH_UNUSED(weight);
205  MATH_UNUSED(factorWeightDecay);
206 
207  return 0;
208  }
209 
210 // L1 regularization
211  template <>
212  inline double computeRegularization<EnumRegularization::L1> (double weight, const double& factorWeightDecay)
213  {
214  return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
215  }
216 
217 // L2 regularization
218  template <>
219  inline double computeRegularization<EnumRegularization::L2> (double weight, const double& factorWeightDecay)
220  {
221  return factorWeightDecay * weight;
222  }
223 
224 
225 /*! \brief update the gradients, using regularization
226  *
227  *
228  */
229  template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
230  void update (ItSource itSource, ItSource itSourceEnd,
231  ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
232  ItTargetGradient itTargetGradientBegin,
233  ItGradient itGradient,
234  ItWeight itWeight, double weightDecay)
235  {
236  // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights
237  while (itSource != itSourceEnd)
238  {
239  auto itTargetDelta = itTargetDeltaBegin;
240  auto itTargetGradient = itTargetGradientBegin;
241  while (itTargetDelta != itTargetDeltaEnd)
242  {
243  (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,weightDecay);
244  ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
245  }
246  ++itSource;
247  }
248  }
249 
250 
251 
252 
253 
254 
255 #define USELOCALWEIGHTS 1
256 
257 
258 
259 /*! \brief implementation of the steepest gradient descent algorithm
260  *
261  * Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle
262  */
263  template <typename Function, typename Weights, typename PassThrough>
264  double Steepest::operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough)
265  {
266  size_t numWeights = weights.size ();
267  // std::vector<double> gradients (numWeights, 0.0);
268  m_localGradients.assign (numWeights, 0.0);
269  // std::vector<double> localWeights (begin (weights), end (weights));
270  // m_localWeights.reserve (numWeights);
271  m_localWeights.assign (begin (weights), end (weights));
272 
273  double E = 1e10;
274  if (m_prevGradients.size () != numWeights)
275  {
276  m_prevGradients.clear ();
277  m_prevGradients.assign (weights.size (), 0);
278  }
279 
280  bool success = true;
281  size_t currentRepetition = 0;
282  while (success)
283  {
284  if (currentRepetition >= m_repetitions)
285  break;
286 
287  m_localGradients.assign (numWeights, 0.0);
288 
289  // --- nesterov momentum ---
290  // apply momentum before computing the new gradient
291  auto itPrevG = begin (m_prevGradients);
292  auto itPrevGEnd = end (m_prevGradients);
293  auto itLocWeight = begin (m_localWeights);
294  for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
295  {
296  (*itPrevG) *= m_beta;
297  (*itLocWeight) += (*itPrevG);
298  }
299 
300  E = fitnessFunction (passThrough, m_localWeights, m_localGradients);
301 // plotGradients (gradients);
302 // plotWeights (localWeights);
303 
304  double alpha = gaussDouble (m_alpha, m_alpha/2.0);
305 // double alpha = m_alpha;
306 
307  auto itG = begin (m_localGradients);
308  auto itGEnd = end (m_localGradients);
309  itPrevG = begin (m_prevGradients);
310  double maxGrad = 0.0;
311  for (; itG != itGEnd; ++itG, ++itPrevG)
312  {
313  double currGrad = (*itG);
314  double prevGrad = (*itPrevG);
315  currGrad *= alpha;
316 
317  //(*itPrevG) = m_beta * (prevGrad + currGrad);
318  currGrad += prevGrad;
319  (*itG) = currGrad;
320  (*itPrevG) = currGrad;
321 
322  if (std::fabs (currGrad) > maxGrad)
323  maxGrad = currGrad;
324  }
325 
326  if (maxGrad > 1)
327  {
328  m_alpha /= 2;
329  std::cout << "\nlearning rate reduced to " << m_alpha << std::endl;
330  std::for_each (weights.begin (), weights.end (), [maxGrad](double& w)
331  {
332  w /= maxGrad;
333  });
334  m_prevGradients.clear ();
335  }
336  else
337  {
338  auto itW = std::begin (weights);
339  std::for_each (std::begin (m_localGradients), std::end (m_localGradients), [&itW](double& g)
340  {
341  *itW += g;
342  ++itW;
343  });
344  }
345 
346  ++currentRepetition;
347  }
348  return E;
349  }
350 
351 
352 
353 
354 
355 
356 
357 
358 
359 
360 
361 
362 
363 
364 
365 
366 
367 
368 
369 
370 /*! \brief sum of squares error function
371  *
372  *
373  */
374  template <typename ItOutput, typename ItTruth, typename ItDelta, typename InvFnc>
375  double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc, double patternWeight)
376  {
377  double errorSum = 0.0;
378 
379  // output - truth
380  ItTruth itTruth = itTruthBegin;
381  bool hasDeltas = (itDelta != itDeltaEnd);
382  for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
383  {
384 // assert (itTruth != itTruthEnd);
385  double output = (*itOutput);
386  double error = output - (*itTruth);
387  if (hasDeltas)
388  {
389  (*itDelta) = (*invFnc.get ()) (output) * error * patternWeight;
390  ++itDelta;
391  }
392  errorSum += error*error * patternWeight;
393  }
394 
395  return 0.5*errorSum;
396  }
397 
398 
399 
400 /*! \brief cross entropy error function
401  *
402  *
403  */
404  template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
405  double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)
406  {
407  bool hasDeltas = (itDelta != itDeltaEnd);
408 
409  double errorSum = 0.0;
410  for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
411  {
412  double probability = *itProbability;
413  double truth = *itTruthBegin;
414  /* truth = truth < 0.1 ? 0.1 : truth; */
415  /* truth = truth > 0.9 ? 0.9 : truth; */
416  truth = truth < 0.5 ? 0.1 : 0.9;
417  if (hasDeltas)
418  {
419  double delta = probability - truth;
420  (*itDelta) = delta*patternWeight;
421 // (*itDelta) = (*itInvActFnc)(probability) * delta * patternWeight;
422  ++itDelta;
423  }
424  double error (0);
425  if (probability == 0) // protection against log (0)
426  {
427  if (truth >= 0.5)
428  error += 1.0;
429  }
430  else if (probability == 1)
431  {
432  if (truth < 0.5)
433  error += 1.0;
434  }
435  else
436  error += - (truth * log (probability) + (1.0-truth) * log (1.0-probability)); // cross entropy function
437  errorSum += error * patternWeight;
438 
439  }
440  return errorSum;
441  }
442 
443 
444 
445 
446 /*! \brief soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
447  *
448  *
449  */
450  template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
451  double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)
452  {
453  double errorSum = 0.0;
454 
455  bool hasDeltas = (itDelta != itDeltaEnd);
456  // output - truth
457  ItTruth itTruth = itTruthBegin;
458  for (auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
459  {
460 // assert (itTruth != itTruthEnd);
461  double probability = (*itProbability);
462  double truth = (*itTruth);
463  if (hasDeltas)
464  {
465  (*itDelta) = probability - truth;
466 // (*itDelta) = (*itInvActFnc)(sm) * delta * patternWeight;
467  ++itDelta; //++itInvActFnc;
468  }
469  double error (0);
470 
471  error += truth * log (probability);
472  errorSum += error;
473  }
474 
475  return -errorSum * patternWeight;
476  }
477 
478 
479 
480 
481 
482 
483 
484 
485 
486 /*! \brief compute the weight decay for regularization (L1 or L2)
487  *
488  *
489  */
490  template <typename ItWeight>
491  double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
492  {
493  if (eRegularization == EnumRegularization::L1)
494  {
495  // weight decay (regularization)
496  double w = 0;
497  size_t n = 0;
498  for (; itWeight != itWeightEnd; ++itWeight, ++n)
499  {
500  double weight = (*itWeight);
501  w += std::fabs (weight);
502  }
503  return error + 0.5 * w * factorWeightDecay / n;
504  }
505  else if (eRegularization == EnumRegularization::L2)
506  {
507  // weight decay (regularization)
508  double w = 0;
509  size_t n = 0;
510  for (; itWeight != itWeightEnd; ++itWeight, ++n)
511  {
512  double weight = (*itWeight);
513  w += weight*weight;
514  }
515  return error + 0.5 * w * factorWeightDecay / n;
516  }
517  else
518  return error;
519  }
520 
521 
522 
523 
524 
525 
526 
527 
528 
529 
530 
531 
532 
533 
534 /*! \brief apply the weights (and functions) in forward direction of the DNN
535  *
536  *
537  */
538  template <typename LAYERDATA>
539  void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
540  {
541  if (prevLayerData.hasDropOut ())
542  {
543  applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
544  currLayerData.weightsBegin (),
545  currLayerData.valuesBegin (), currLayerData.valuesEnd (),
546  prevLayerData.dropOut ());
547  }
548  else
549  {
550  bool dummy = true;
551  applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
552  currLayerData.weightsBegin (),
553  currLayerData.valuesBegin (), currLayerData.valuesEnd (),
554  &dummy); // dummy to turn on all nodes (no drop out)
555  }
556  }
557 
558 
559 
560 /*! \brief backward application of the weights (back-propagation of the error)
561  *
562  *
563  */
564 template <typename LAYERDATA>
565  void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
566 {
567  if (prevLayerData.hasDropOut ())
568  {
569  applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
570  currLayerData.weightsBegin (),
571  prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
572  prevLayerData.dropOut ());
573  }
574  else
575  {
576  bool dummy = true;
577  applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
578  currLayerData.weightsBegin (),
579  prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
580  &dummy); // dummy to use all nodes (no drop out)
581  }
582 }
583 
584 
585 
586 
587 
588 /*! \brief update the node values
589  *
590  *
591  */
592  template <typename LAYERDATA>
593  void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization)
594  {
595  // ! the "factorWeightDecay" has already to be scaled by 1/n where n is the number of weights
596  if (factorWeightDecay != 0.0) // has weight regularization
597  if (regularization == EnumRegularization::L1) // L1 regularization ( sum(|w|) )
598  {
599  update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
600  currLayerData.deltasBegin (), currLayerData.deltasEnd (),
601  currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
602  currLayerData.weightsBegin (), factorWeightDecay);
603  }
604  else if (regularization == EnumRegularization::L2) // L2 regularization ( sum(w^2) )
605  {
606  update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
607  currLayerData.deltasBegin (), currLayerData.deltasEnd (),
608  currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
609  currLayerData.weightsBegin (), factorWeightDecay);
610  }
611  else
612  {
613  update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
614  currLayerData.deltasBegin (), currLayerData.deltasEnd (),
615  currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
616  }
617 
618  else
619  { // no weight regularization
620  update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
621  currLayerData.deltasBegin (), currLayerData.deltasEnd (),
622  currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
623  }
624  }
625 
626 
627 
628 
629 
630 
631 
632 
633 
634 
635 
636 
637 /*! \brief compute the drop-out-weight factor
638  *
639  * when using drop-out a fraction of the nodes is turned off at each cycle of the computation
640  * once all nodes are turned on again (for instances when the test samples are evaluated),
641  * the weights have to be adjusted to account for the different number of active nodes
642  * this function computes the factor and applies it to the weights
643  */
644  template <typename WeightsType, typename DropProbabilities>
645  void Net::dropOutWeightFactor (WeightsType& weights,
646  const DropProbabilities& drops,
647  bool inverse)
648  {
649  if (drops.empty () || weights.empty ())
650  return;
651 
652  auto itWeight = std::begin (weights);
653  auto itWeightEnd = std::end (weights);
654  auto itDrop = std::begin (drops);
655  auto itDropEnd = std::end (drops);
656  size_t numNodesPrev = inputSize ();
657  double dropFractionPrev = *itDrop;
658  ++itDrop;
659 
660  for (auto& layer : layers ())
661  {
662  if (itDrop == itDropEnd)
663  break;
664 
665  size_t _numNodes = layer.numNodes ();
666 
667  double dropFraction = *itDrop;
668  double pPrev = 1.0 - dropFractionPrev;
669  double p = 1.0 - dropFraction;
670  p *= pPrev;
671 
672  if (inverse)
673  {
674  p = 1.0/p;
675  }
676  size_t _numWeights = layer.numWeights (numNodesPrev);
677  for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
678  {
679  if (itWeight == itWeightEnd)
680  break;
681 
682  *itWeight *= p;
683  ++itWeight;
684  }
685  numNodesPrev = _numNodes;
686  dropFractionPrev = dropFraction;
687  ++itDrop;
688  }
689  }
690 
691 
692 
693 
694 
695 
696 /*! \brief execute the training until convergence emerges
697  *
698  * \param weights the container with the weights (synapses)
699  * \param trainPattern the pattern for the training
700  * \param testPattern the pattern for the testing
701  * \param minimizer the minimizer (e.g. steepest gradient descent) to be used
702  * \param settings the settings for the training (e.g. multithreading or not, regularization etc.)
703  */
704  template <typename Minimizer>
705  double Net::train (std::vector<double>& weights,
706  std::vector<Pattern>& trainPattern,
707  const std::vector<Pattern>& testPattern,
708  Minimizer& minimizer,
709  Settings& settings)
710  {
711 // std::cout << "START TRAINING" << std::endl;
712  settings.startTrainCycle ();
713 
714  // JsMVA progress bar maximum (100%)
715  if (fIPyMaxIter) *fIPyMaxIter = 100;
716 
717  settings.pads (4);
718  settings.create ("trainErrors", 100, 0, 100, 100, 0,1);
719  settings.create ("testErrors", 100, 0, 100, 100, 0,1);
720 
721  size_t cycleCount = 0;
722  size_t testCycleCount = 0;
723  double testError = 1e20;
724  double trainError = 1e20;
725  size_t dropOutChangeCount = 0;
726 
727  DropContainer dropContainer;
728  DropContainer dropContainerTest;
729  const std::vector<double>& dropFractions = settings.dropFractions ();
730  bool isWeightsForDrop = false;
731 
732 
733  // until convergence
734  do
735  {
736  ++cycleCount;
737 
738  // if dropOut enabled
739  size_t dropIndex = 0;
740  if (!dropFractions.empty () && dropOutChangeCount % settings.dropRepetitions () == 0)
741  {
742  // fill the dropOut-container
743  dropContainer.clear ();
744  size_t _numNodes = inputSize ();
745  double dropFraction = 0.0;
746  dropFraction = dropFractions.at (dropIndex);
747  ++dropIndex;
748  fillDropContainer (dropContainer, dropFraction, _numNodes);
749  for (auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
750  {
751  auto& layer = *itLayer;
752  _numNodes = layer.numNodes ();
753  // how many nodes have to be dropped
754  dropFraction = 0.0;
755  if (dropFractions.size () > dropIndex)
756  dropFraction = dropFractions.at (dropIndex);
757 
758  fillDropContainer (dropContainer, dropFraction, _numNodes);
759  }
760  isWeightsForDrop = true;
761  }
762 
763  // execute training cycle
764  trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
765 
766 
767  // ------ check if we have to execute a test ------------------
768  bool hasConverged = false;
769  if (testCycleCount % settings.testRepetitions () == 0) // we test only everye "testRepetitions" repetition
770  {
771  if (isWeightsForDrop)
772  {
773  dropOutWeightFactor (weights, dropFractions);
774  isWeightsForDrop = false;
775  }
776 
777 
778  testError = 0;
779  //double weightSum = 0;
780  settings.startTestCycle ();
781  if (settings.useMultithreading ())
782  {
783  size_t numThreads = std::thread::hardware_concurrency ();
784  size_t patternPerThread = testPattern.size () / numThreads;
785  std::vector<Batch> batches;
786  auto itPat = testPattern.begin ();
787  // auto itPatEnd = testPattern.end ();
788  for (size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
789  {
790  batches.push_back (Batch (itPat, itPat + patternPerThread));
791  itPat += patternPerThread;
792  }
793  if (itPat != testPattern.end ())
794  batches.push_back (Batch (itPat, testPattern.end ()));
795 
796  std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
797  for (auto& batch : batches)
798  {
799  // -------------------- execute each of the batch ranges on a different thread -------------------------------
800  futures.push_back (
801  std::async (std::launch::async, [&]()
802  {
803  std::vector<double> localOutput;
804  pass_through_type passThrough (settings, batch, dropContainerTest);
805  double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput);
806  return std::make_tuple (testBatchError, localOutput);
807  })
808  );
809  }
810 
811  auto itBatch = batches.begin ();
812  for (auto& f : futures)
813  {
814  std::tuple<double,std::vector<double>> result = f.get ();
815  testError += std::get<0>(result) / batches.size ();
816  std::vector<double> output = std::get<1>(result);
817 
818  //if (output.size () == testPattern.size ())
819  {
820  //auto it = begin (testPattern);
821  auto it = (*itBatch).begin ();
822  for (double out : output)
823  {
824  settings.testSample (0, out, (*it).output ().at (0), (*it).weight ());
825  ++it;
826  }
827  }
828  ++itBatch;
829  }
830 
831  }
832  else
833  {
834  std::vector<double> output;
835  //for (auto it = begin (testPattern), itEnd = end (testPattern); it != itEnd; ++it)
836  {
837  //const Pattern& p = (*it);
838  //double weight = p.weight ();
839  //Batch batch (it, it+1);
840  Batch batch (begin (testPattern), end (testPattern));
841  output.clear ();
842  pass_through_type passThrough (settings, batch, dropContainerTest);
843  double testPatternError = (*this) (passThrough, weights, ModeOutput::FETCH, output);
844 
845  auto it = batch.begin ();
846  for (double out : output)
847  {
848  settings.testSample (0, out, (*it).output ().at (0), (*it).weight ());
849  ++it;
850  }
851  //weightSum += fabs (weight);
852  //testError += testPatternError*weight;
853  testError += testPatternError; /// batch.size ();
854  }
855  // testError /= testPattern.size ();
856  }
857  settings.endTestCycle ();
858 // testError /= weightSum;
859 
860  settings.computeResult (*this, weights);
861 
862  hasConverged = settings.hasConverged (testError);
863  if (!hasConverged && !isWeightsForDrop)
864  {
865  dropOutWeightFactor (weights, dropFractions, true); // inverse
866  isWeightsForDrop = true;
867  }
868  }
869  ++testCycleCount;
870  ++dropOutChangeCount;
871 
872 
873  static double x = -1.0;
874  x += 1.0;
875 // settings.resetPlot ("errors");
876  settings.addPoint ("trainErrors", cycleCount, trainError);
877  settings.addPoint ("testErrors", cycleCount, testError);
878  settings.plot ("trainErrors", "C", 1, kBlue);
879  settings.plot ("testErrors", "C", 1, kMagenta);
880 
881 
882  // setup error plots and progress bar variables for JsMVA
883  if (fInteractive){
884  fInteractive->AddPoint(cycleCount, trainError, testError);
885  if (*fExitFromTraining) break;
886  *fIPyCurrentIter = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
887  }
888 
889  if (hasConverged)
890  break;
891 
892  if ((int)cycleCount % 10 == 0) {
893 
894  TString convText = Form( "(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
895  trainError,
896  testError,
897  (int)cycleCount,
898  (int)settings.convergenceCount (),
899  (int)settings.maxConvergenceCount ());
900  double progress = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
901  settings.cycle (progress, convText);
902  }
903  }
904  while (true);
905  settings.endTrainCycle (trainError);
906 
907  TString convText = Form( "(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (int)cycleCount);
908  double progress = 100*(double)settings.maxConvergenceCount() /(double)settings.convergenceSteps ();
909  settings.cycle (progress, convText);
910 
911  return testError;
912  }
913 
914 
915 
916 /*! \brief execute a single training cycle
917  *
918  * uses multithreading if turned on
919  *
920  * \param minimizer the minimizer to be used (e.g. SGD)
921  * \param weights the weight container with all the synapse weights
922  * \param itPatternBegin begin of the pattern container
923  * \parama itPatternEnd the end of the pattern container
924  * \param settings the settings for this training (e.g. multithreading or not, regularization, etc.)
925  * \param dropContainer the data for dropping-out nodes (regularization technique)
926  */
927  template <typename Iterator, typename Minimizer>
928  inline double Net::trainCycle (Minimizer& minimizer, std::vector<double>& weights,
929  Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer)
930  {
931  double error = 0.0;
932  size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
933  size_t numBatches = numPattern/settings.batchSize ();
934  size_t numBatches_stored = numBatches;
935 
936  std::random_shuffle (itPatternBegin, itPatternEnd);
937  Iterator itPatternBatchBegin = itPatternBegin;
938  Iterator itPatternBatchEnd = itPatternBatchBegin;
939 
940  // create batches
941  std::vector<Batch> batches;
942  while (numBatches > 0)
943  {
944  std::advance (itPatternBatchEnd, settings.batchSize ());
945  batches.push_back (Batch (itPatternBatchBegin, itPatternBatchEnd));
946  itPatternBatchBegin = itPatternBatchEnd;
947  --numBatches;
948  }
949 
950  // add the last pattern to the last batch
951  if (itPatternBatchEnd != itPatternEnd)
952  batches.push_back (Batch (itPatternBatchEnd, itPatternEnd));
953 
954 
955  ///< turn on multithreading if requested
956  if (settings.useMultithreading ())
957  {
958  // -------------------- divide the batches into bunches for each thread --------------
959  size_t numThreads = std::thread::hardware_concurrency ();
960  size_t batchesPerThread = batches.size () / numThreads;
961  typedef std::vector<Batch>::iterator batch_iterator;
962  std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
963  batch_iterator itBatchBegin = std::begin (batches);
964  batch_iterator itBatchCurrEnd = std::begin (batches);
965  batch_iterator itBatchEnd = std::end (batches);
966  for (size_t iT = 0; iT < numThreads; ++iT)
967  {
968  if (iT == numThreads-1)
969  itBatchCurrEnd = itBatchEnd;
970  else
971  std::advance (itBatchCurrEnd, batchesPerThread);
972  batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
973  itBatchBegin = itBatchCurrEnd;
974  }
975 
976  // -------------------- loop over batches -------------------------------------------
977  std::vector<std::future<double>> futures;
978  for (auto& batchRange : batchVec)
979  {
980  // -------------------- execute each of the batch ranges on a different thread -------------------------------
981  futures.push_back (
982  std::async (std::launch::async, [&]()
983  {
984  double localError = 0.0;
985  for (auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
986  {
987  Batch& batch = *it;
988  pass_through_type settingsAndBatch (settings, batch, dropContainer);
989  Minimizer minimizerClone (minimizer);
990  localError += minimizerClone ((*this), weights, settingsAndBatch); /// call the minimizer
991  }
992  return localError;
993  })
994  );
995  }
996 
997  for (auto& f : futures)
998  error += f.get ();
999  }
1000  else
1001  {
1002  for (auto& batch : batches)
1003  {
1004  std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1005  error += minimizer ((*this), weights, settingsAndBatch);
1006  }
1007  }
1008 
1009  numBatches_stored = std::max (numBatches_stored, size_t(1)); /// normalize the error
1010  error /= numBatches_stored;
1011  settings.testIteration ();
1012 
1013  return error;
1014  }
1015 
1016 
1017 
1018 
1019 
1020 /*! \brief compute the neural net
1021  *
1022  * \param input the input data
1023  * \param weights the weight data
1024  */
1025  template <typename Weights>
1026  std::vector<double> Net::compute (const std::vector<double>& input, const Weights& weights) const
1027  {
1028  std::vector<LayerData> layerData;
1029  layerData.reserve (m_layers.size ()+1);
1030  auto itWeight = begin (weights);
1031  auto itInputBegin = begin (input);
1032  auto itInputEnd = end (input);
1033  layerData.push_back (LayerData (itInputBegin, itInputEnd));
1034  size_t numNodesPrev = input.size ();
1035 
1036  // -------------------- prepare layer data with one pattern -------------------------------
1037  for (auto& layer: m_layers)
1038  {
1039  layerData.push_back (LayerData (layer.numNodes (), itWeight,
1040  layer.activationFunction (),
1041  layer.modeOutputValues ()));
1042  size_t _numWeights = layer.numWeights (numNodesPrev);
1043  itWeight += _numWeights;
1044  numNodesPrev = layer.numNodes ();
1045  }
1046 
1047 
1048  // --------- forward -------------
1049  forwardPattern (m_layers, layerData);
1050 
1051  // ------------- fetch output ------------------
1052  std::vector<double> output;
1053  fetchOutput (layerData.back (), output);
1054  return output;
1055  }
1056 
1057 
1058  template <typename Weights, typename PassThrough>
1059  double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights) const
1060  {
1061  std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
1062  assert (numWeights () == weights.size ());
1063  double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing, false);
1064  return error;
1065  }
1066 
1067  template <typename Weights, typename PassThrough, typename OutContainer>
1068  double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput /*eFetch*/, OutContainer& outputContainer) const
1069  {
1070  std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
1071  assert (numWeights () == weights.size ());
1072  double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer, true);
1073  return error;
1074  }
1075 
1076 
1077  template <typename Weights, typename Gradients, typename PassThrough>
1078  double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const
1079  {
1080  std::vector<double> nothing;
1081  assert (numWeights () == weights.size ());
1082  assert (weights.size () == gradients.size ());
1083  double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing, false);
1084  return error;
1085  }
1086 
1087  template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1088  double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const
1089  {
1090  MATH_UNUSED(eFetch);
1091  assert (numWeights () == weights.size ());
1092  assert (weights.size () == gradients.size ());
1093  double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer, true);
1094  return error;
1095  }
1096 
1097 
1098 
1099  template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1100  std::vector<std::vector<LayerData>> Net::prepareLayerData (LayerContainer& _layers,
1101  Batch& batch,
1102  const DropContainer& dropContainer,
1103  ItWeight itWeightBegin,
1104  ItWeight /*itWeightEnd*/,
1105  ItGradient itGradientBegin,
1106  ItGradient itGradientEnd,
1107  size_t& totalNumWeights) const
1108  {
1110  bool usesDropOut = !dropContainer.empty ();
1111  if (usesDropOut)
1112  itDropOut = std::begin (dropContainer);
1113 
1114  if (_layers.empty ())
1115  throw std::string ("no layers in this net");
1116 
1117 
1118  // ----------- create layer data -------------------------------------------------------
1119  //LM- This assert not needed anymore (outputsize is actually numNodes+1)
1120  //assert (_layers.back ().numNodes () == outputSize ());
1121  totalNumWeights = 0;
1122  size_t totalNumNodes = 0;
1123  std::vector<std::vector<LayerData>> layerPatternData;
1124  layerPatternData.reserve (_layers.size ()+1);
1125  ItWeight itWeight = itWeightBegin;
1126  ItGradient itGradient = itGradientBegin;
1127  size_t numNodesPrev = inputSize ();
1128  typename Pattern::const_iterator itInputBegin;
1129  typename Pattern::const_iterator itInputEnd;
1130 
1131  // ItWeight itGammaBegin = itWeightBegin + numWeights ();
1132  // ItWeight itBetaBegin = itWeightBegin + numWeights () + numNodes ();
1133  // ItGradient itGradGammaBegin = itGradientBegin + numWeights ();
1134  // ItGradient itGradBetaBegin = itGradientBegin + numWeights () + numNodes ();
1135 
1136 
1137  // --------------------- prepare layer data for input layer ----------------------------
1138  layerPatternData.push_back (std::vector<LayerData>());
1139  for (const Pattern& _pattern : batch)
1140  {
1141  std::vector<LayerData>& layerData = layerPatternData.back ();
1142  layerData.push_back (LayerData (numNodesPrev));
1143 
1144  itInputBegin = _pattern.beginInput ();
1145  itInputEnd = _pattern.endInput ();
1146  layerData.back ().setInput (itInputBegin, itInputEnd);
1147 
1148  if (usesDropOut)
1149  layerData.back ().setDropOut (itDropOut);
1150 
1151  }
1152 
1153 
1154  if (usesDropOut)
1155  itDropOut += _layers.back ().numNodes ();
1156 
1157  // ---------------- prepare subsequent layers ---------------------------------------------
1158  // for each of the layers
1159  for (auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1160  {
1161  bool isOutputLayer = (itLayer+1 == itLayerEnd);
1162  bool isFirstHiddenLayer = (itLayer == begin (_layers));
1163 
1164  auto& layer = *itLayer;
1165  layerPatternData.push_back (std::vector<LayerData>());
1166  // for each pattern, prepare a layerData
1167  for (const Pattern& _pattern : batch)
1168  {
1169  std::vector<LayerData>& layerData = layerPatternData.back ();
1170  //layerData.push_back (LayerData (numNodesPrev));
1171 
1172  if (itGradientBegin == itGradientEnd)
1173  {
1174  layerData.push_back (LayerData (layer.numNodes (), itWeight,
1175  layer.activationFunction (),
1176  layer.modeOutputValues ()));
1177  }
1178  else
1179  {
1180  layerData.push_back (LayerData (layer.numNodes (), itWeight, itGradient,
1181  layer.activationFunction (),
1182  layer.inverseActivationFunction (),
1183  layer.modeOutputValues ()));
1184  }
1185 
1186  if (usesDropOut)
1187  {
1188  layerData.back ().setDropOut (itDropOut);
1189  }
1190 
1191  }
1192 
1193  if (usesDropOut)
1194  {
1195  itDropOut += layer.numNodes ();
1196  }
1197  size_t _numWeights = layer.numWeights (numNodesPrev);
1198  totalNumWeights += _numWeights;
1199  itWeight += _numWeights;
1200  itGradient += _numWeights;
1201  numNodesPrev = layer.numNodes ();
1202  totalNumNodes += numNodesPrev;
1203 
1204  }
1205  assert (totalNumWeights > 0);
1206  return layerPatternData;
1207 }
1208 
1209 
1210 
1211  template <typename LayerContainer>
1212  void Net::forwardPattern (const LayerContainer& _layers,
1213  std::vector<LayerData>& layerData) const
1214  {
1215  size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1216  size_t cumulativeNodeCount = 0;
1217  for (; idxLayer < idxLayerEnd; ++idxLayer)
1218  {
1219  LayerData& prevLayerData = layerData.at (idxLayer);
1220  LayerData& currLayerData = layerData.at (idxLayer+1);
1221 
1222  forward (prevLayerData, currLayerData);
1223 
1224  applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1225  }
1226  }
1227 
1228 
1229 
1230 
1231  template <typename LayerContainer, typename LayerPatternContainer>
1232  void Net::forwardBatch (const LayerContainer& _layers,
1233  LayerPatternContainer& layerPatternData,
1234  std::vector<double>& valuesMean,
1235  std::vector<double>& valuesStdDev,
1236  size_t trainFromLayer) const
1237  {
1238  valuesMean.clear ();
1239  valuesStdDev.clear ();
1240 
1241  // ---------------------------------- loop over layers and pattern -------------------------------------------------------
1242  size_t cumulativeNodeCount = 0;
1243  for (size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1244  {
1245  bool doTraining = idxLayer >= trainFromLayer;
1246 
1247  // get layer-pattern data for this and the corresponding one from the next layer
1248  std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1249  std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1250 
1251  size_t numPattern = prevLayerPatternData.size ();
1252  size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1253 
1254  std::vector<MeanVariance> means (numNodesLayer);
1255  // ---------------- loop over layerDatas of pattern compute forward ----------------------------
1256  for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1257  {
1258  const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1259  LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1260 
1261 
1262  forward (prevLayerData, currLayerData); // feed forward
1263  }
1264 
1265  // ---------------- loop over layerDatas of pattern apply non-linearities ----------------------------
1266  for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1267  {
1268  //const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1269  LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1270 
1271  if (doTraining)
1272  applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (),
1273  currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ());
1274  else
1275  applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1276  }
1277 
1278  // accumulate node count
1279  cumulativeNodeCount += numNodesLayer;
1280  }
1281 }
1282 
1283 
1284 
1285 
1286  template <typename OutputContainer>
1287  void Net::fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const
1288  {
1289  ModeOutputValues eModeOutput = lastLayerData.outputMode ();
1290  if (isFlagSet (ModeOutputValues::DIRECT, eModeOutput))
1291  {
1292  outputContainer.insert (outputContainer.end (), lastLayerData.valuesBegin (), lastLayerData.valuesEnd ());
1293  }
1294  else if (isFlagSet (ModeOutputValues::SIGMOID, eModeOutput) ||
1295  isFlagSet (ModeOutputValues::SOFTMAX, eModeOutput))
1296  {
1297  const auto& prob = lastLayerData.probabilities ();
1298  outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1299  }
1300  else
1301  assert (false);
1302  }
1303 
1304 
1305 
1306 
1307  template <typename OutputContainer>
1308  void Net::fetchOutput (const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer) const
1309  {
1310  for (const LayerData& lastLayerData : lastLayerPatternData)
1311  fetchOutput (lastLayerData, outputContainer);
1312  }
1313 
1314 
1315 
1316  template <typename ItWeight>
1317  std::tuple</*sumError*/double,/*sumWeights*/double> Net::computeError (const Settings& settings,
1318  std::vector<LayerData>& lastLayerData,
1319  Batch& batch,
1320  ItWeight itWeightBegin,
1321  ItWeight itWeightEnd) const
1322  {
1323  typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1324  typename std::vector<LayerData>::iterator itLayerDataEnd = lastLayerData.end ();
1325 
1326  typename std::vector<Pattern>::const_iterator itPattern = batch.begin ();
1327  typename std::vector<Pattern>::const_iterator itPatternEnd = batch.end ();
1328 
1329  double sumWeights (0.0);
1330  double sumError (0.0);
1331 
1332  size_t idxPattern = 0;
1333  for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1334  {
1335  ++idxPattern;
1336 
1337  // compute E and the deltas of the computed output and the true output
1338  LayerData& layerData = (*itLayerData);
1339  const Pattern& _pattern = (*itPattern);
1340  double error = errorFunction (layerData, _pattern.output (),
1341  itWeightBegin, itWeightEnd,
1342  _pattern.weight (), settings.factorWeightDecay (),
1343  settings.regularization ());
1344  sumWeights += fabs (_pattern.weight ());
1345  sumError += error;
1346  }
1347  return std::make_tuple (sumError, sumWeights);
1348  }
1349 
1350 
1351 
1352  template <typename Settings>
1353  void Net::backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1354  const Settings& settings,
1355  size_t trainFromLayer,
1356  size_t totalNumWeights) const
1357  {
1358  bool doTraining = layerPatternData.size () > trainFromLayer;
1359  if (doTraining) // training
1360  {
1361  // ------------- backpropagation -------------
1362  size_t idxLayer = layerPatternData.size ();
1363  for (auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1364  itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1365  {
1366  --idxLayer;
1367  if (idxLayer <= trainFromLayer) // no training
1368  break;
1369 
1370  std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1371  std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1372 
1373  size_t idxPattern = 0;
1374  for (typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1375  itPrevLayerData = begin (prevLayerDataColl), itPrevLayerDataEnd = end (prevLayerDataColl);
1376  itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)
1377  {
1378  LayerData& currLayerData = (*itCurrLayerData);
1379  LayerData& prevLayerData = *(itPrevLayerData);
1380 
1381  backward (prevLayerData, currLayerData);
1382 
1383  // the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses)
1384  // because L1 and L2 regularization
1385  //
1386  // http://neuralnetworksanddeeplearning.com/chap3.html#overfitting_and_regularization
1387  //
1388  // L1 : -factorWeightDecay*sgn(w)/numWeights
1389  // L2 : -factorWeightDecay/numWeights
1390  update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization ());
1391  }
1392  }
1393  }
1394  }
1395 
1396 
1397 
1398 /*! \brief forward propagation and backward propagation
1399  *
1400  *
1401  */
1402  template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1403  double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch,
1404  ItWeight itWeightBegin, ItWeight itWeightEnd,
1405  ItGradient itGradientBegin, ItGradient itGradientEnd,
1406  size_t trainFromLayer,
1407  OutContainer& outputContainer, bool doFetchOutput) const
1408  {
1409  Settings& settings = std::get<0>(settingsAndBatch);
1410  Batch& batch = std::get<1>(settingsAndBatch);
1411  DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1412 
1413  double sumError = 0.0;
1414  double sumWeights = 0.0; // -------------
1415 
1416 
1417  // ----------------------------- prepare layer data -------------------------------------
1418  size_t totalNumWeights (0);
1419  std::vector<std::vector<LayerData>> layerPatternData = prepareLayerData (_layers,
1420  batch,
1421  dropContainer,
1422  itWeightBegin,
1423  itWeightEnd,
1424  itGradientBegin,
1425  itGradientEnd,
1426  totalNumWeights);
1427 
1428 
1429 
1430  // ---------------------------------- propagate forward ------------------------------------------------------------------
1431  std::vector<double> valuesMean;
1432  std::vector<double> valuesStdDev;
1433  forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1434 
1435 
1436  // ------------- fetch output ------------------
1437  if (doFetchOutput)
1438  {
1439  fetchOutput (layerPatternData.back (), outputContainer);
1440  }
1441 
1442 
1443  // ------------- error computation -------------
1444  std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1445 
1446 
1447  // ------------- backpropagation -------------
1448  backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1449 
1450 
1451  // --- compile the measures
1452  double batchSize = std::distance (std::begin (batch), std::end (batch));
1453  for (auto it = itGradientBegin; it != itGradientEnd; ++it)
1454  (*it) /= batchSize;
1455 
1456 
1457  sumError /= sumWeights;
1458  return sumError;
1459  }
1460 
1461 
1462 
1463 /*! \brief initialization of the weights
1464  *
1465  *
1466  */
1467  template <typename OutIterator>
1468  void Net::initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
1469  {
1470  if (eInitStrategy == WeightInitializationStrategy::XAVIER)
1471  {
1472  // input and output properties
1473  int numInput = inputSize ();
1474 
1475  // compute variance and mean of input and output
1476  //...
1477 
1478 
1479  // compute the weights
1480  for (auto& layer: layers ())
1481  {
1482  double nIn = numInput;
1483  double stdDev = sqrt (2.0/nIn);
1484  for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1485  {
1486  (*itWeight) = DNN::gaussDouble (0.0, stdDev); // factor 2.0 for ReLU
1487  ++itWeight;
1488  }
1489  numInput = layer.numNodes ();
1490  }
1491  return;
1492  }
1493 
1494  if (eInitStrategy == WeightInitializationStrategy::XAVIERUNIFORM)
1495  {
1496  // input and output properties
1497  int numInput = inputSize ();
1498 
1499  // compute variance and mean of input and output
1500  //...
1501 
1502 
1503  // compute the weights
1504  for (auto& layer: layers ())
1505  {
1506  double nIn = numInput;
1507  double minVal = -sqrt(2.0/nIn);
1508  double maxVal = sqrt (2.0/nIn);
1509  for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1510  {
1511 
1512  (*itWeight) = DNN::uniformDouble (minVal, maxVal); // factor 2.0 for ReLU
1513  ++itWeight;
1514  }
1515  numInput = layer.numNodes ();
1516  }
1517  return;
1518  }
1519 
1520  if (eInitStrategy == WeightInitializationStrategy::TEST)
1521  {
1522  // input and output properties
1523  int numInput = inputSize ();
1524 
1525  // compute variance and mean of input and output
1526  //...
1527 
1528 
1529  // compute the weights
1530  for (auto& layer: layers ())
1531  {
1532 // double nIn = numInput;
1533  for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1534  {
1535  (*itWeight) = DNN::gaussDouble (0.0, 0.1);
1536  ++itWeight;
1537  }
1538  numInput = layer.numNodes ();
1539  }
1540  return;
1541  }
1542 
1543  if (eInitStrategy == WeightInitializationStrategy::LAYERSIZE)
1544  {
1545  // input and output properties
1546  int numInput = inputSize ();
1547 
1548  // compute variance and mean of input and output
1549  //...
1550 
1551 
1552  // compute the weights
1553  for (auto& layer: layers ())
1554  {
1555  double nIn = numInput;
1556  for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1557  {
1558  (*itWeight) = DNN::gaussDouble (0.0, sqrt (layer.numWeights (nIn))); // factor 2.0 for ReLU
1559  ++itWeight;
1560  }
1561  numInput = layer.numNodes ();
1562  }
1563  return;
1564  }
1565 
1566  }
1567 
1568 
1569 
1570 
1571 
1572 /*! \brief compute the error function
1573  *
1574  *
1575  */
1576  template <typename Container, typename ItWeight>
1577  double Net::errorFunction (LayerData& layerData,
1578  Container truth,
1579  ItWeight itWeight,
1580  ItWeight itWeightEnd,
1581  double patternWeight,
1582  double factorWeightDecay,
1583  EnumRegularization eRegularization) const
1584  {
1585  double error (0);
1586  switch (m_eErrorFunction)
1587  {
1589  {
1590  error = sumOfSquares (layerData.valuesBegin (), layerData.valuesEnd (), begin (truth), end (truth),
1591  layerData.deltasBegin (), layerData.deltasEnd (),
1592  layerData.inverseActivationFunction (),
1593  patternWeight);
1594  break;
1595  }
1597  {
1598  assert (!TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.outputMode ()));
1599  std::vector<double> probabilities = layerData.probabilities ();
1600  error = crossEntropy (begin (probabilities), end (probabilities),
1601  begin (truth), end (truth),
1602  layerData.deltasBegin (), layerData.deltasEnd (),
1603  layerData.inverseActivationFunction (),
1604  patternWeight);
1605  break;
1606  }
1608  {
1609  std::cout << "softmax." << std::endl;
1610  assert (!TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.outputMode ()));
1611  std::vector<double> probabilities = layerData.probabilities ();
1612  error = softMaxCrossEntropy (begin (probabilities), end (probabilities),
1613  begin (truth), end (truth),
1614  layerData.deltasBegin (), layerData.deltasEnd (),
1615  layerData.inverseActivationFunction (),
1616  patternWeight);
1617  break;
1618  }
1619  }
1620  if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE)
1621  {
1622  error = weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);
1623  }
1624  return error;
1625  }
1626 
1627 
1628 
1629 
1630 
1631 
1632 
1633 // /*! \brief pre-training
1634 // *
1635 // * in development
1636 // */
1637 // template <typename Minimizer>
1638 // void Net::preTrain (std::vector<double>& weights,
1639 // std::vector<Pattern>& trainPattern,
1640 // const std::vector<Pattern>& testPattern,
1641 // Minimizer& minimizer, Settings& settings)
1642 // {
1643 // auto itWeightGeneral = std::begin (weights);
1644 // std::vector<Pattern> prePatternTrain (trainPattern.size ());
1645 // std::vector<Pattern> prePatternTest (testPattern.size ());
1646 
1647 // size_t _inputSize = inputSize ();
1648 
1649 // // transform pattern using the created preNet
1650 // auto initializePrePattern = [&](const std::vector<Pattern>& pttrnInput, std::vector<Pattern>& pttrnOutput)
1651 // {
1652 // pttrnOutput.clear ();
1653 // std::transform (std::begin (pttrnInput), std::end (pttrnInput),
1654 // std::back_inserter (pttrnOutput),
1655 // [](const Pattern& p)
1656 // {
1657 // Pattern pat (p.input (), p.input (), p.weight ());
1658 // return pat;
1659 // });
1660 // };
1661 
1662 // initializePrePattern (trainPattern, prePatternTrain);
1663 // initializePrePattern (testPattern, prePatternTest);
1664 
1665 // std::vector<double> originalDropFractions = settings.dropFractions ();
1666 
1667 // for (auto& _layer : layers ())
1668 // {
1669 // // compute number of weights (as a function of the number of incoming nodes)
1670 // // fetch number of nodes
1671 // size_t numNodes = _layer.numNodes ();
1672 // size_t _numWeights = _layer.numWeights (_inputSize);
1673 
1674 // // ------------------
1675 // DNN::Net preNet;
1676 // if (!originalDropFractions.empty ())
1677 // {
1678 // originalDropFractions.erase (originalDropFractions.begin ());
1679 // settings.setDropOut (originalDropFractions.begin (), originalDropFractions.end (), settings.dropRepetitions ());
1680 // }
1681 // std::vector<double> preWeights;
1682 
1683 // // define the preNet (pretraining-net) for this layer
1684 // // outputSize == inputSize, because this is an autoencoder;
1685 // preNet.setInputSize (_inputSize);
1686 // preNet.addLayer (DNN::Layer (numNodes, _layer.activationFunctionType ()));
1687 // preNet.addLayer (DNN::Layer (_inputSize, DNN::EnumFunction::LINEAR, DNN::ModeOutputValues::DIRECT));
1688 // preNet.setErrorFunction (DNN::ModeErrorFunction::SUMOFSQUARES);
1689 // preNet.setOutputSize (_inputSize); // outputSize is the inputSize (autoencoder)
1690 
1691 // // initialize weights
1692 // preNet.initializeWeights (DNN::WeightInitializationStrategy::XAVIERUNIFORM,
1693 // std::back_inserter (preWeights));
1694 
1695 // // overwrite already existing weights from the "general" weights
1696 // std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ());
1697 // std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()+_numWeights); // set identical weights for the temporary output layer
1698 
1699 
1700 // // train the "preNet"
1701 // preNet.train (preWeights, prePatternTrain, prePatternTest, minimizer, settings);
1702 
1703 // // fetch the pre-trained weights (without the output part of the autoencoder)
1704 // std::copy (std::begin (preWeights), std::begin (preWeights) + _numWeights, itWeightGeneral);
1705 
1706 // // advance the iterator on the incoming weights
1707 // itWeightGeneral += _numWeights;
1708 
1709 // // remove the weights of the output layer of the preNet
1710 // preWeights.erase (preWeights.begin () + _numWeights, preWeights.end ());
1711 
1712 // // remove the outputLayer of the preNet
1713 // preNet.removeLayer ();
1714 
1715 // // set the output size to the number of nodes in the new output layer (== last hidden layer)
1716 // preNet.setOutputSize (numNodes);
1717 
1718 // // transform pattern using the created preNet
1719 // auto proceedPattern = [&](std::vector<Pattern>& pttrn)
1720 // {
1721 // std::vector<Pattern> newPttrn;
1722 // std::for_each (std::begin (pttrn), std::end (pttrn),
1723 // [&preNet,&preWeights,&newPttrn](Pattern& p)
1724 // {
1725 // std::vector<double> output = preNet.compute (p.input (), preWeights);
1726 // Pattern pat (output, output, p.weight ());
1727 // newPttrn.push_back (pat);
1728 // // p = pat;
1729 // });
1730 // return newPttrn;
1731 // };
1732 
1733 
1734 // prePatternTrain = proceedPattern (prePatternTrain);
1735 // prePatternTest = proceedPattern (prePatternTest);
1736 
1737 
1738 // // the new input size is the output size of the already reduced preNet
1739 // _inputSize = preNet.layers ().back ().numNodes ();
1740 // }
1741 // }
1742 
1743 
1744 
1745 
1746 
1747 
1748 
1749 
1750 
1751 
1752 
1753 
1754 
1755 
1756 
1757 
1758  } // namespace DNN
1759 } // namespace TMVA
1760 
1761 #endif
void addPoint(std::string histoName, double x)
for monitoring
Definition: NeuralNet.h:828
static std::shared_ptr< std::function< double(double)> > InvGauss
Definition: NeuralNet.icc:72
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition: NeuralNet.h:612
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
Definition: NeuralNet.icc:1317
size_t convergenceCount() const
returns the current convergence count
Definition: NeuralNet.h:834
virtual void cycle(double progress, TString text)
Definition: NeuralNet.h:806
double tanh(double)
static std::shared_ptr< std::function< double(double)> > Tanh
Definition: NeuralNet.icc:50
std::vector< char > DropContainer
Definition: NeuralNet.h:220
static std::shared_ptr< std::function< double(double)> > InvReLU
Definition: NeuralNet.icc:60
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:595
bool isFlagSet(T flag, T value)
Definition: NeuralNet.h:213
static std::shared_ptr< std::function< double(double)> > InvTanh
Definition: NeuralNet.icc:51
double T(double x)
Definition: ChebyshevPol.h:34
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
Definition: NeuralNet.icc:1232
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition: NeuralNet.h:773
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
Definition: NeuralNet.icc:1212
std::vector< double > & output()
Definition: Pattern.h:90
const std::vector< double > & dropFractions() const
Definition: NeuralNet.h:769
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
Definition: NeuralNet.icc:565
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition: NeuralNet.h:820
Basic string class.
Definition: TString.h:137
bool useMultithreading() const
is multithreading turned on?
Definition: NeuralNet.h:822
Definition: Pattern.h:7
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
Definition: NeuralNet.icc:928
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition: NeuralNet.h:830
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
Definition: NeuralNet.icc:176
virtual void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:813
static std::shared_ptr< std::function< double(double)> > InvSoftSign
Definition: NeuralNet.icc:69
static std::shared_ptr< std::function< double(double)> > TanhShift
Definition: NeuralNet.icc:65
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
static std::shared_ptr< std::function< double(double)> > Sigmoid
Definition: NeuralNet.icc:47
virtual void startTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:811
double sqrt(double)
Double_t x[n]
Definition: legend1.C:17
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:491
ModeOutputValues outputMode() const
returns the output mode
Definition: NeuralNet.h:592
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition: NeuralNet.h:611
double pow(double, double)
static std::shared_ptr< std::function< double(double)> > SymmReLU
Definition: NeuralNet.icc:56
const_iterator end() const
Definition: NeuralNet.h:246
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition: NeuralNet.h:374
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition: NeuralNet.h:826
T uniformFromTo(T from, T to)
Definition: NeuralNet.icc:26
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
Definition: NeuralNet.icc:645
size_t testRepetitions() const
how often is the test data tested
Definition: NeuralNet.h:775
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
Definition: NeuralNet.icc:1287
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1468
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
size_t dropRepetitions() const
Definition: NeuralNet.h:768
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition: NeuralNet.h:593
double factorWeightDecay() const
get the weight-decay factor
Definition: NeuralNet.h:776
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition: NeuralNet.h:795
RooCmdArg Minimizer(const char *type, const char *alg=0)
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
Definition: NeuralNet.icc:1059
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
Definition: NeuralNet.icc:451
char * Form(const char *fmt,...)
Double_t E()
Definition: TMath.h:54
static std::shared_ptr< std::function< double(double)> > InvSoftPlus
Definition: NeuralNet.icc:63
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:213
double computeRegularization(double weight, const double &factorWeightDecay)
compute the regularization (L1, L2)
Definition: NeuralNet.icc:202
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
Definition: NeuralNet.icc:1577
void pads(int numPads)
preparation for monitoring
Definition: NeuralNet.h:825
std::vector< double >::const_iterator const_iterator
Definition: Pattern.h:12
static std::shared_ptr< std::function< double(double)> > SoftSign
Definition: NeuralNet.icc:68
Settings for the training of the neural net.
Definition: NeuralNet.h:736
virtual void startTrainCycle()
Definition: NeuralNet.h:789
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition: NeuralNet.h:1056
static std::shared_ptr< std::function< double(double)> > ReLU
Definition: NeuralNet.icc:59
static std::shared_ptr< std::function< double(double)> > InvSigmoid
Definition: NeuralNet.icc:48
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition: NeuralNet.h:377
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
static std::shared_ptr< std::function< double(double)> > GaussComplement
Definition: NeuralNet.icc:74
double weight() const
Definition: Pattern.h:80
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition: NeuralNet.h:587
double f(double x)
ModeOutputValues
Definition: NeuralNet.h:179
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
Definition: NeuralNet.icc:1100
double gaussDouble(double mean, double sigma)
Definition: NeuralNet.cxx:13
The Batch class encapsulates one mini-batch.
Definition: NeuralNet.h:235
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
Definition: NeuralNet.icc:1026
double m_beta
internal parameter (momentum)
Definition: NeuralNet.h:373
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:705
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition: NeuralNet.h:835
static RooMathCoreReg dummy
static std::shared_ptr< std::function< double(double)> > Gauss
Definition: NeuralNet.icc:71
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
Definition: NeuralNet.icc:539
double uniformDouble(double minValue, double maxValue)
Definition: NeuralNet.cxx:21
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
Definition: NeuralNet.icc:264
static std::shared_ptr< std::function< double(double)> > Linear
Definition: NeuralNet.icc:53
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:596
Abstract ClassifierFactory template that handles arbitrary types.
#define MATH_UNUSED(var)
Definition: Util.h:25
static std::shared_ptr< std::function< double(double)> > InvGaussComplement
Definition: NeuralNet.icc:75
static std::shared_ptr< std::function< double(double)> > InvLinear
Definition: NeuralNet.icc:54
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
Definition: NeuralNet.icc:1403
size_t batchSize() const
mini-batch size
Definition: NeuralNet.h:774
virtual void endTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:812
static std::shared_ptr< std::function< double(double)> > InvTanhShift
Definition: NeuralNet.icc:66
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition: NeuralNet.h:376
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition: NeuralNet.h:601
void backPropagate(std::vector< std::vector< LayerData >> &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Definition: NeuralNet.icc:1353
double result[121]
Definition: Rtypes.h:61
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition: NeuralNet.h:816
const_iterator begin() const
Definition: NeuralNet.h:245
DropContainer::const_iterator const_dropout_iterator
Definition: NeuralNet.h:449
double m_alpha
internal parameter (learningRate)
Definition: NeuralNet.h:372
double exp(double)
EnumRegularization
Definition: NeuralNet.h:173
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition: NeuralNet.h:586
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
static std::shared_ptr< std::function< double(double)> > SoftPlus
Definition: NeuralNet.icc:62
const Int_t n
Definition: legend1.C:16
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:405
static std::shared_ptr< std::function< double(double)> > ZeroFnc
Definition: NeuralNet.icc:44
virtual bool hasConverged(double testError)
has this training converged already?
Definition: NeuralNet.cxx:466
LayerData holds the data of one layer.
Definition: NeuralNet.h:437
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition: NeuralNet.h:788
double log(double)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition: NeuralNet.h:1301
static std::shared_ptr< std::function< double(double)> > InvSymmReLU
Definition: NeuralNet.icc:57