Logo ROOT   6.12/07
Reference Guide
NeuralNet.icc
Go to the documentation of this file.
1 #ifndef TMVA_NEURAL_NET_I
2 #define TMVA_NEURAL_NET_I
3 
4 #ifndef TMVA_NEURAL_NET
5 #error "Do not use NeuralNet.icc directly. #include \"NeuralNet.h\" instead."
6 #endif // TMVA_NEURAL_NET
7 #pragma once
8 #pragma GCC diagnostic ignored "-Wunused-variable"
9 
10 #include "Math/Util.h"
11 
12 #include "TMVA/Pattern.h"
13 #include "TMVA/MethodBase.h"
14 
15 #include <tuple>
16 #include <future>
17 #include <random>
18 
19 namespace TMVA
20 {
21  namespace DNN
22  {
23 
24 
25 
26 
27 
28 
29 
30 
31  template <typename T>
32  T uniformFromTo (T from, T to)
33  {
34  return from + (rand ()* (to - from)/RAND_MAX);
35  }
36 
37 
38 
39  template <typename Container, typename T>
40  void uniformDouble (Container& container, T maxValue)
41  {
42  for (auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
43  {
44 // (*it) = uniformFromTo (-1.0*maxValue, 1.0*maxValue);
45  (*it) = TMVA::DNN::uniformFromTo (-1.0*maxValue, 1.0*maxValue);
46  }
47  }
48 
49 
50  static std::shared_ptr<std::function<double(double)>> ZeroFnc = std::make_shared<std::function<double(double)>> ([](double /*value*/){ return 0; });
51 
52 
53  static std::shared_ptr<std::function<double(double)>> Sigmoid = std::make_shared<std::function<double(double)>> ([](double value){ value = std::max (-100.0, std::min (100.0,value)); return 1.0/(1.0 + std::exp (-value)); });
54  static std::shared_ptr<std::function<double(double)>> InvSigmoid = std::make_shared<std::function<double(double)>> ([](double value){ double s = (*Sigmoid.get ()) (value); return s*(1.0-s); });
55 
56  static std::shared_ptr<std::function<double(double)>> Tanh = std::make_shared<std::function<double(double)>> ([](double value){ return tanh (value); });
57  static std::shared_ptr<std::function<double(double)>> InvTanh = std::make_shared<std::function<double(double)>> ([](double value){ return 1.0 - std::pow (value, 2.0); });
58 
59  static std::shared_ptr<std::function<double(double)>> Linear = std::make_shared<std::function<double(double)>> ([](double value){ return value; });
60  static std::shared_ptr<std::function<double(double)>> InvLinear = std::make_shared<std::function<double(double)>> ([](double /*value*/){ return 1.0; });
61 
62  static std::shared_ptr<std::function<double(double)>> SymmReLU = std::make_shared<std::function<double(double)>> ([](double value){ const double margin = 0.3; return value > margin ? value-margin : value < -margin ? value+margin : 0; });
63  static std::shared_ptr<std::function<double(double)>> InvSymmReLU = std::make_shared<std::function<double(double)>> ([](double value){ const double margin = 0.3; return value > margin ? 1.0 : value < -margin ? 1.0 : 0; });
64 
65  static std::shared_ptr<std::function<double(double)>> ReLU = std::make_shared<std::function<double(double)>> ([](double value){ const double margin = 0.0; return value > margin ? value-margin : 0; });
66  static std::shared_ptr<std::function<double(double)>> InvReLU = std::make_shared<std::function<double(double)>> ([](double value){ const double margin = 0.0; return value > margin ? 1.0 : 0; });
67 
68  static std::shared_ptr<std::function<double(double)>> SoftPlus = std::make_shared<std::function<double(double)>> ([](double value){ return std::log (1.0+ std::exp (value)); });
69  static std::shared_ptr<std::function<double(double)>> InvSoftPlus = std::make_shared<std::function<double(double)>> ([](double value){ return 1.0 / (1.0 + std::exp (-value)); });
70 
71  static std::shared_ptr<std::function<double(double)>> TanhShift = std::make_shared<std::function<double(double)>> ([](double value){ return tanh (value-0.3); });
72  static std::shared_ptr<std::function<double(double)>> InvTanhShift = std::make_shared<std::function<double(double)>> ([](double value){ return 0.3 + (1.0 - std::pow (value, 2.0)); });
73 
74  static std::shared_ptr<std::function<double(double)>> SoftSign = std::make_shared<std::function<double(double)>> ([](double value){ return value / (1.0 + fabs (value)); });
75  static std::shared_ptr<std::function<double(double)>> InvSoftSign = std::make_shared<std::function<double(double)>> ([](double value){ return std::pow ((1.0 - fabs (value)),2.0); });
76 
77  static std::shared_ptr<std::function<double(double)>> Gauss = std::make_shared<std::function<double(double)>> ([](double value){ const double s = 6.0; return exp (-std::pow(value*s,2.0)); });
78  static std::shared_ptr<std::function<double(double)>> InvGauss = std::make_shared<std::function<double(double)>> ([](double value){ const double s = 6.0; return -2.0 * value * s*s * (*Gauss.get ()) (value); });
79 
80  static std::shared_ptr<std::function<double(double)>> GaussComplement = std::make_shared<std::function<double(double)>> ([](double value){ const double s = 6.0; return 1.0 - exp (-std::pow(value*s,2.0)); });
81  static std::shared_ptr<std::function<double(double)>> InvGaussComplement = std::make_shared<std::function<double(double)>> ([](double value){ const double s = 6.0; return +2.0 * value * s*s * (*GaussComplement.get ()) (value); });
82 
83 
84 
85 /*! \brief apply weights using drop-out; for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"
86  *
87  * itDrop correlates with itSourceBegin
88  */
89 template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItTarget, typename ItDrop>
90  void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd,
91  ItWeight itWeight,
92  ItTarget itTargetBegin, ItTarget itTargetEnd,
93  ItDrop itDrop)
94  {
95  for (auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
96  {
97  for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
98  {
99  if (!HasDropOut || *itDrop)
100  (*itTarget) += (*itSource) * (*itWeight);
101  ++itWeight;
102  }
103  if (HasDropOut) ++itDrop;
104  }
105  }
106 
107 
108 
109 
110 
111 
112 /*! \brief apply weights backwards (for backprop); for no drop out, provide (&bool = true) to itDrop such that *itDrop becomes "true"
113  *
114  * itDrop correlates with itPrev (to be in agreement with "applyWeights" where it correlates with itSources (same node as itTarget here in applyBackwards)
115  */
116 template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItPrev, typename ItDrop>
117  void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd,
118  ItWeight itWeight,
119  ItPrev itPrevBegin, ItPrev itPrevEnd,
120  ItDrop itDrop)
121  {
122  for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
123  {
124  for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
125  {
126  if (!HasDropOut || *itDrop)
127  (*itPrev) += (*itCurr) * (*itWeight);
128  ++itWeight;
129  }
130  if (HasDropOut) ++itDrop;
131  }
132  }
133 
134 
135 
136 
137 
138 
139 
140 /*! \brief apply the activation functions
141  *
142  *
143  */
144 
145  template <typename ItValue, typename Fnc>
146  void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc)
147  {
148  while (itValue != itValueEnd)
149  {
150  auto& value = (*itValue);
151  value = (*fnc.get ()) (value);
152 
153  ++itValue;
154  }
155  }
156 
157 
158 /*! \brief apply the activation functions and compute the gradient
159  *
160  *
161  */
162  template <typename ItValue, typename Fnc, typename InvFnc, typename ItGradient>
163  void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
164  {
165  while (itValue != itValueEnd)
166  {
167  auto& value = (*itValue);
168  value = (*fnc.get ()) (value);
169  (*itGradient) = (*invFnc.get ()) (value);
170 
171  ++itValue; ++itGradient;
172  }
173  }
174 
175 
176 
177 /*! \brief update the gradients
178  *
179  *
180  */
181  template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
182  void update (ItSource itSource, ItSource itSourceEnd,
183  ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
184  ItTargetGradient itTargetGradientBegin,
185  ItGradient itGradient)
186  {
187  while (itSource != itSourceEnd)
188  {
189  auto itTargetDelta = itTargetDeltaBegin;
190  auto itTargetGradient = itTargetGradientBegin;
191  while (itTargetDelta != itTargetDeltaEnd)
192  {
193  (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
194  ++itTargetDelta; ++itTargetGradient; ++itGradient;
195  }
196  ++itSource;
197  }
198  }
199 
200 
201 
202 
203 /*! \brief compute the regularization (L1, L2)
204  *
205  *
206  */
207  template <EnumRegularization Regularization>
208  inline double computeRegularization (double weight, const double& factorWeightDecay)
209  {
210  MATH_UNUSED(weight);
211  MATH_UNUSED(factorWeightDecay);
212 
213  return 0;
214  }
215 
216 // L1 regularization
217  template <>
218  inline double computeRegularization<EnumRegularization::L1> (double weight, const double& factorWeightDecay)
219  {
220  return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
221  }
222 
223 // L2 regularization
224  template <>
225  inline double computeRegularization<EnumRegularization::L2> (double weight, const double& factorWeightDecay)
226  {
227  return factorWeightDecay * weight;
228  }
229 
230 
231 /*! \brief update the gradients, using regularization
232  *
233  *
234  */
235  template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
236  void update (ItSource itSource, ItSource itSourceEnd,
237  ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
238  ItTargetGradient itTargetGradientBegin,
239  ItGradient itGradient,
240  ItWeight itWeight, double weightDecay)
241  {
242  // ! the factor weightDecay has to be already scaled by 1/n where n is the number of weights
243  while (itSource != itSourceEnd)
244  {
245  auto itTargetDelta = itTargetDeltaBegin;
246  auto itTargetGradient = itTargetGradientBegin;
247  while (itTargetDelta != itTargetDeltaEnd)
248  {
249  (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,weightDecay);
250  ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
251  }
252  ++itSource;
253  }
254  }
255 
256 
257 
258 
259 
260 
261 #define USELOCALWEIGHTS 1
262 
263 
264 
265 /*! \brief implementation of the steepest gradient descent algorithm
266  *
267  * Can be used with multithreading (i.e. "HogWild!" style); see call in trainCycle
268  */
269  template <typename Function, typename Weights, typename PassThrough>
270  double Steepest::operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough)
271  {
272  size_t numWeights = weights.size ();
273  // std::vector<double> gradients (numWeights, 0.0);
274  m_localGradients.assign (numWeights, 0.0);
275  // std::vector<double> localWeights (begin (weights), end (weights));
276  // m_localWeights.reserve (numWeights);
277  m_localWeights.assign (begin (weights), end (weights));
278 
279  double E = 1e10;
280  if (m_prevGradients.size () != numWeights)
281  {
282  m_prevGradients.clear ();
283  m_prevGradients.assign (weights.size (), 0);
284  }
285 
286  bool success = true;
287  size_t currentRepetition = 0;
288  while (success)
289  {
290  if (currentRepetition >= m_repetitions)
291  break;
292 
293  m_localGradients.assign (numWeights, 0.0);
294 
295  // --- nesterov momentum ---
296  // apply momentum before computing the new gradient
297  auto itPrevG = begin (m_prevGradients);
298  auto itPrevGEnd = end (m_prevGradients);
299  auto itLocWeight = begin (m_localWeights);
300  for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
301  {
302  (*itPrevG) *= m_beta;
303  (*itLocWeight) += (*itPrevG);
304  }
305 
306  E = fitnessFunction (passThrough, m_localWeights, m_localGradients);
307 // plotGradients (gradients);
308 // plotWeights (localWeights);
309 
310  double alpha = gaussDouble (m_alpha, m_alpha/2.0);
311 // double alpha = m_alpha;
312 
313  auto itG = begin (m_localGradients);
314  auto itGEnd = end (m_localGradients);
315  itPrevG = begin (m_prevGradients);
316  double maxGrad = 0.0;
317  for (; itG != itGEnd; ++itG, ++itPrevG)
318  {
319  double currGrad = (*itG);
320  double prevGrad = (*itPrevG);
321  currGrad *= alpha;
322 
323  //(*itPrevG) = m_beta * (prevGrad + currGrad);
324  currGrad += prevGrad;
325  (*itG) = currGrad;
326  (*itPrevG) = currGrad;
327 
328  if (std::fabs (currGrad) > maxGrad)
329  maxGrad = currGrad;
330  }
331 
332  if (maxGrad > 1)
333  {
334  m_alpha /= 2;
335  std::cout << "\nlearning rate reduced to " << m_alpha << std::endl;
336  std::for_each (weights.begin (), weights.end (), [maxGrad](double& w)
337  {
338  w /= maxGrad;
339  });
340  m_prevGradients.clear ();
341  }
342  else
343  {
344  auto itW = std::begin (weights);
345  std::for_each (std::begin (m_localGradients), std::end (m_localGradients), [&itW](double& g)
346  {
347  *itW += g;
348  ++itW;
349  });
350  }
351 
352  ++currentRepetition;
353  }
354  return E;
355  }
356 
357 
358 
359 
360 
361 
362 
363 
364 
365 
366 
367 
368 
369 
370 
371 
372 
373 
374 
375 
376 /*! \brief sum of squares error function
377  *
378  *
379  */
380  template <typename ItOutput, typename ItTruth, typename ItDelta, typename InvFnc>
381  double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc, double patternWeight)
382  {
383  double errorSum = 0.0;
384 
385  // output - truth
386  ItTruth itTruth = itTruthBegin;
387  bool hasDeltas = (itDelta != itDeltaEnd);
388  for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
389  {
390 // assert (itTruth != itTruthEnd);
391  double output = (*itOutput);
392  double error = output - (*itTruth);
393  if (hasDeltas)
394  {
395  (*itDelta) = (*invFnc.get ()) (output) * error * patternWeight;
396  ++itDelta;
397  }
398  errorSum += error*error * patternWeight;
399  }
400 
401  return 0.5*errorSum;
402  }
403 
404 
405 
406 /*! \brief cross entropy error function
407  *
408  *
409  */
410  template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
411  double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)
412  {
413  bool hasDeltas = (itDelta != itDeltaEnd);
414 
415  double errorSum = 0.0;
416  for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
417  {
418  double probability = *itProbability;
419  double truth = *itTruthBegin;
420  /* truth = truth < 0.1 ? 0.1 : truth; */
421  /* truth = truth > 0.9 ? 0.9 : truth; */
422  truth = truth < 0.5 ? 0.1 : 0.9;
423  if (hasDeltas)
424  {
425  double delta = probability - truth;
426  (*itDelta) = delta*patternWeight;
427 // (*itDelta) = (*itInvActFnc)(probability) * delta * patternWeight;
428  ++itDelta;
429  }
430  double error (0);
431  if (probability == 0) // protection against log (0)
432  {
433  if (truth >= 0.5)
434  error += 1.0;
435  }
436  else if (probability == 1)
437  {
438  if (truth < 0.5)
439  error += 1.0;
440  }
441  else
442  error += - (truth * log (probability) + (1.0-truth) * log (1.0-probability)); // cross entropy function
443  errorSum += error * patternWeight;
444 
445  }
446  return errorSum;
447  }
448 
449 
450 
451 
452 /*! \brief soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
453  *
454  *
455  */
456  template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
457  double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth /*itTruthEnd*/, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc /*itInvActFnc*/, double patternWeight)
458  {
459  double errorSum = 0.0;
460 
461  bool hasDeltas = (itDelta != itDeltaEnd);
462  // output - truth
463  ItTruth itTruth = itTruthBegin;
464  for (auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
465  {
466 // assert (itTruth != itTruthEnd);
467  double probability = (*itProbability);
468  double truth = (*itTruth);
469  if (hasDeltas)
470  {
471  (*itDelta) = probability - truth;
472 // (*itDelta) = (*itInvActFnc)(sm) * delta * patternWeight;
473  ++itDelta; //++itInvActFnc;
474  }
475  double error (0);
476 
477  error += truth * log (probability);
478  errorSum += error;
479  }
480 
481  return -errorSum * patternWeight;
482  }
483 
484 
485 
486 
487 
488 
489 
490 
491 
492 /*! \brief compute the weight decay for regularization (L1 or L2)
493  *
494  *
495  */
496  template <typename ItWeight>
497  double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
498  {
499  if (eRegularization == EnumRegularization::L1)
500  {
501  // weight decay (regularization)
502  double w = 0;
503  size_t n = 0;
504  for (; itWeight != itWeightEnd; ++itWeight, ++n)
505  {
506  double weight = (*itWeight);
507  w += std::fabs (weight);
508  }
509  return error + 0.5 * w * factorWeightDecay / n;
510  }
511  else if (eRegularization == EnumRegularization::L2)
512  {
513  // weight decay (regularization)
514  double w = 0;
515  size_t n = 0;
516  for (; itWeight != itWeightEnd; ++itWeight, ++n)
517  {
518  double weight = (*itWeight);
519  w += weight*weight;
520  }
521  return error + 0.5 * w * factorWeightDecay / n;
522  }
523  else
524  return error;
525  }
526 
527 
528 
529 
530 
531 
532 
533 
534 
535 
536 
537 
538 
539 
540 /*! \brief apply the weights (and functions) in forward direction of the DNN
541  *
542  *
543  */
544  template <typename LAYERDATA>
545  void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
546  {
547  if (prevLayerData.hasDropOut ())
548  {
549  applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
550  currLayerData.weightsBegin (),
551  currLayerData.valuesBegin (), currLayerData.valuesEnd (),
552  prevLayerData.dropOut ());
553  }
554  else
555  {
556  bool dummy = true;
557  applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
558  currLayerData.weightsBegin (),
559  currLayerData.valuesBegin (), currLayerData.valuesEnd (),
560  &dummy); // dummy to turn on all nodes (no drop out)
561  }
562  }
563 
564 
565 
566 /*! \brief backward application of the weights (back-propagation of the error)
567  *
568  *
569  */
570 template <typename LAYERDATA>
571  void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
572 {
573  if (prevLayerData.hasDropOut ())
574  {
575  applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
576  currLayerData.weightsBegin (),
577  prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
578  prevLayerData.dropOut ());
579  }
580  else
581  {
582  bool dummy = true;
583  applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
584  currLayerData.weightsBegin (),
585  prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
586  &dummy); // dummy to use all nodes (no drop out)
587  }
588 }
589 
590 
591 
592 
593 
594 /*! \brief update the node values
595  *
596  *
597  */
598  template <typename LAYERDATA>
599  void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization)
600  {
601  // ! the "factorWeightDecay" has already to be scaled by 1/n where n is the number of weights
602  if (factorWeightDecay != 0.0) // has weight regularization
603  if (regularization == EnumRegularization::L1) // L1 regularization ( sum(|w|) )
604  {
605  update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
606  currLayerData.deltasBegin (), currLayerData.deltasEnd (),
607  currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
608  currLayerData.weightsBegin (), factorWeightDecay);
609  }
610  else if (regularization == EnumRegularization::L2) // L2 regularization ( sum(w^2) )
611  {
612  update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
613  currLayerData.deltasBegin (), currLayerData.deltasEnd (),
614  currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
615  currLayerData.weightsBegin (), factorWeightDecay);
616  }
617  else
618  {
619  update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
620  currLayerData.deltasBegin (), currLayerData.deltasEnd (),
621  currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
622  }
623 
624  else
625  { // no weight regularization
626  update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
627  currLayerData.deltasBegin (), currLayerData.deltasEnd (),
628  currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
629  }
630  }
631 
632 
633 
634 
635 
636 
637 
638 
639 
640 
641 
642 
643 /*! \brief compute the drop-out-weight factor
644  *
645  * when using drop-out a fraction of the nodes is turned off at each cycle of the computation
646  * once all nodes are turned on again (for instances when the test samples are evaluated),
647  * the weights have to be adjusted to account for the different number of active nodes
648  * this function computes the factor and applies it to the weights
649  */
650  template <typename WeightsType, typename DropProbabilities>
651  void Net::dropOutWeightFactor (WeightsType& weights,
652  const DropProbabilities& drops,
653  bool inverse)
654  {
655  if (drops.empty () || weights.empty ())
656  return;
657 
658  auto itWeight = std::begin (weights);
659  auto itWeightEnd = std::end (weights);
660  auto itDrop = std::begin (drops);
661  auto itDropEnd = std::end (drops);
662  size_t numNodesPrev = inputSize ();
663  double dropFractionPrev = *itDrop;
664  ++itDrop;
665 
666  for (auto& layer : layers ())
667  {
668  if (itDrop == itDropEnd)
669  break;
670 
671  size_t _numNodes = layer.numNodes ();
672 
673  double dropFraction = *itDrop;
674  double pPrev = 1.0 - dropFractionPrev;
675  double p = 1.0 - dropFraction;
676  p *= pPrev;
677 
678  if (inverse)
679  {
680  p = 1.0/p;
681  }
682  size_t _numWeights = layer.numWeights (numNodesPrev);
683  for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
684  {
685  if (itWeight == itWeightEnd)
686  break;
687 
688  *itWeight *= p;
689  ++itWeight;
690  }
691  numNodesPrev = _numNodes;
692  dropFractionPrev = dropFraction;
693  ++itDrop;
694  }
695  }
696 
697 
698 
699 
700 
701 
702 /*! \brief execute the training until convergence emerges
703  *
704  * \param weights the container with the weights (synapses)
705  * \param trainPattern the pattern for the training
706  * \param testPattern the pattern for the testing
707  * \param minimizer the minimizer (e.g. steepest gradient descent) to be used
708  * \param settings the settings for the training (e.g. multithreading or not, regularization etc.)
709  */
710  template <typename Minimizer>
711  double Net::train (std::vector<double>& weights,
712  std::vector<Pattern>& trainPattern,
713  const std::vector<Pattern>& testPattern,
714  Minimizer& minimizer,
715  Settings& settings)
716  {
717 // std::cout << "START TRAINING" << std::endl;
718  settings.startTrainCycle ();
719 
720  // JsMVA progress bar maximum (100%)
721  if (fIPyMaxIter) *fIPyMaxIter = 100;
722 
723  settings.pads (4);
724  settings.create ("trainErrors", 100, 0, 100, 100, 0,1);
725  settings.create ("testErrors", 100, 0, 100, 100, 0,1);
726 
727  size_t cycleCount = 0;
728  size_t testCycleCount = 0;
729  double testError = 1e20;
730  double trainError = 1e20;
731  size_t dropOutChangeCount = 0;
732 
733  DropContainer dropContainer;
734  DropContainer dropContainerTest;
735  const std::vector<double>& dropFractions = settings.dropFractions ();
736  bool isWeightsForDrop = false;
737 
738 
739  // until convergence
740  do
741  {
742  ++cycleCount;
743 
744  // if dropOut enabled
745  size_t dropIndex = 0;
746  if (!dropFractions.empty () && dropOutChangeCount % settings.dropRepetitions () == 0)
747  {
748  // fill the dropOut-container
749  dropContainer.clear ();
750  size_t _numNodes = inputSize ();
751  double dropFraction = 0.0;
752  dropFraction = dropFractions.at (dropIndex);
753  ++dropIndex;
754  fillDropContainer (dropContainer, dropFraction, _numNodes);
755  for (auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
756  {
757  auto& layer = *itLayer;
758  _numNodes = layer.numNodes ();
759  // how many nodes have to be dropped
760  dropFraction = 0.0;
761  if (dropFractions.size () > dropIndex)
762  dropFraction = dropFractions.at (dropIndex);
763 
764  fillDropContainer (dropContainer, dropFraction, _numNodes);
765  }
766  isWeightsForDrop = true;
767  }
768 
769  // execute training cycle
770  trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
771 
772 
773  // ------ check if we have to execute a test ------------------
774  bool hasConverged = false;
775  if (testCycleCount % settings.testRepetitions () == 0) // we test only everye "testRepetitions" repetition
776  {
777  if (isWeightsForDrop)
778  {
779  dropOutWeightFactor (weights, dropFractions);
780  isWeightsForDrop = false;
781  }
782 
783 
784  testError = 0;
785  //double weightSum = 0;
786  settings.startTestCycle ();
787  if (settings.useMultithreading ())
788  {
789  size_t numThreads = std::thread::hardware_concurrency ();
790  size_t patternPerThread = testPattern.size () / numThreads;
791  std::vector<Batch> batches;
792  auto itPat = testPattern.begin ();
793  // auto itPatEnd = testPattern.end ();
794  for (size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
795  {
796  batches.push_back (Batch (itPat, itPat + patternPerThread));
797  itPat += patternPerThread;
798  }
799  if (itPat != testPattern.end ())
800  batches.push_back (Batch (itPat, testPattern.end ()));
801 
802  std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
803  for (auto& batch : batches)
804  {
805  // -------------------- execute each of the batch ranges on a different thread -------------------------------
806  futures.push_back (
807  std::async (std::launch::async, [&]()
808  {
809  std::vector<double> localOutput;
810  pass_through_type passThrough (settings, batch, dropContainerTest);
811  double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput);
812  return std::make_tuple (testBatchError, localOutput);
813  })
814  );
815  }
816 
817  auto itBatch = batches.begin ();
818  for (auto& f : futures)
819  {
820  std::tuple<double,std::vector<double>> result = f.get ();
821  testError += std::get<0>(result) / batches.size ();
822  std::vector<double> output = std::get<1>(result);
823  if (output.size() == (outputSize() - 1) * itBatch->size())
824  {
825  auto output_iterator = output.begin();
826  for (auto pattern_it = itBatch->begin(); pattern_it != itBatch->end(); ++pattern_it)
827  {
828  for (size_t output_index = 1; output_index < outputSize(); ++output_index)
829  {
830  settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
831  (*pattern_it).weight ());
832  ++output_iterator;
833  }
834  }
835  }
836  ++itBatch;
837  }
838 
839  }
840  else
841  {
842  std::vector<double> output;
843  //for (auto it = begin (testPattern), itEnd = end (testPattern); it != itEnd; ++it)
844  {
845  //const Pattern& p = (*it);
846  //double weight = p.weight ();
847  //Batch batch (it, it+1);
848  Batch batch (begin (testPattern), end (testPattern));
849  output.clear ();
850  pass_through_type passThrough (settings, batch, dropContainerTest);
851  double testPatternError = (*this) (passThrough, weights, ModeOutput::FETCH, output);
852  if (output.size() == (outputSize() - 1) * batch.size())
853  {
854  auto output_iterator = output.begin();
855  for (auto pattern_it = batch.begin(); pattern_it != batch.end(); ++pattern_it)
856  {
857  for (size_t output_index = 1; output_index < outputSize(); ++output_index)
858  {
859  settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
860  (*pattern_it).weight ());
861  ++output_iterator;
862  }
863  }
864  }
865  testError += testPatternError; /// batch.size ();
866  }
867  // testError /= testPattern.size ();
868  }
869  settings.endTestCycle ();
870 // testError /= weightSum;
871 
872  settings.computeResult (*this, weights);
873 
874  hasConverged = settings.hasConverged (testError);
875  if (!hasConverged && !isWeightsForDrop)
876  {
877  dropOutWeightFactor (weights, dropFractions, true); // inverse
878  isWeightsForDrop = true;
879  }
880  }
881  ++testCycleCount;
882  ++dropOutChangeCount;
883 
884 
885  static double x = -1.0;
886  x += 1.0;
887 // settings.resetPlot ("errors");
888  settings.addPoint ("trainErrors", cycleCount, trainError);
889  settings.addPoint ("testErrors", cycleCount, testError);
890  settings.plot ("trainErrors", "C", 1, kBlue);
891  settings.plot ("testErrors", "C", 1, kMagenta);
892 
893 
894  // setup error plots and progress bar variables for JsMVA
895  if (fInteractive){
896  fInteractive->AddPoint(cycleCount, trainError, testError);
897  if (*fExitFromTraining) break;
898  *fIPyCurrentIter = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
899  }
900 
901  if (hasConverged)
902  break;
903 
904  if ((int)cycleCount % 10 == 0) {
905 
906  TString convText = Form( "(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
907  trainError,
908  testError,
909  (int)cycleCount,
910  (int)settings.convergenceCount (),
911  (int)settings.maxConvergenceCount ());
912  double progress = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
913  settings.cycle (progress, convText);
914  }
915  }
916  while (true);
917  settings.endTrainCycle (trainError);
918 
919  TString convText = Form( "(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (int)cycleCount);
920  double progress = 100*(double)settings.maxConvergenceCount() /(double)settings.convergenceSteps ();
921  settings.cycle (progress, convText);
922 
923  return testError;
924  }
925 
926 
927 
928 /*! \brief execute a single training cycle
929  *
930  * uses multithreading if turned on
931  *
932  * \param minimizer the minimizer to be used (e.g. SGD)
933  * \param weights the weight container with all the synapse weights
934  * \param itPatternBegin begin of the pattern container
935  * \parama itPatternEnd the end of the pattern container
936  * \param settings the settings for this training (e.g. multithreading or not, regularization, etc.)
937  * \param dropContainer the data for dropping-out nodes (regularization technique)
938  */
939  template <typename Iterator, typename Minimizer>
940  inline double Net::trainCycle (Minimizer& minimizer, std::vector<double>& weights,
941  Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer)
942  {
943  double error = 0.0;
944  size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
945  size_t numBatches = numPattern/settings.batchSize ();
946  size_t numBatches_stored = numBatches;
947 
948  std::shuffle(itPatternBegin, itPatternEnd, std::default_random_engine{});
949  Iterator itPatternBatchBegin = itPatternBegin;
950  Iterator itPatternBatchEnd = itPatternBatchBegin;
951 
952  // create batches
953  std::vector<Batch> batches;
954  while (numBatches > 0)
955  {
956  std::advance (itPatternBatchEnd, settings.batchSize ());
957  batches.push_back (Batch (itPatternBatchBegin, itPatternBatchEnd));
958  itPatternBatchBegin = itPatternBatchEnd;
959  --numBatches;
960  }
961 
962  // add the last pattern to the last batch
963  if (itPatternBatchEnd != itPatternEnd)
964  batches.push_back (Batch (itPatternBatchEnd, itPatternEnd));
965 
966 
967  ///< turn on multithreading if requested
968  if (settings.useMultithreading ())
969  {
970  // -------------------- divide the batches into bunches for each thread --------------
971  size_t numThreads = std::thread::hardware_concurrency ();
972  size_t batchesPerThread = batches.size () / numThreads;
973  typedef std::vector<Batch>::iterator batch_iterator;
974  std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
975  batch_iterator itBatchBegin = std::begin (batches);
976  batch_iterator itBatchCurrEnd = std::begin (batches);
977  batch_iterator itBatchEnd = std::end (batches);
978  for (size_t iT = 0; iT < numThreads; ++iT)
979  {
980  if (iT == numThreads-1)
981  itBatchCurrEnd = itBatchEnd;
982  else
983  std::advance (itBatchCurrEnd, batchesPerThread);
984  batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
985  itBatchBegin = itBatchCurrEnd;
986  }
987 
988  // -------------------- loop over batches -------------------------------------------
989  std::vector<std::future<double>> futures;
990  for (auto& batchRange : batchVec)
991  {
992  // -------------------- execute each of the batch ranges on a different thread -------------------------------
993  futures.push_back (
994  std::async (std::launch::async, [&]()
995  {
996  double localError = 0.0;
997  for (auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
998  {
999  Batch& batch = *it;
1000  pass_through_type settingsAndBatch (settings, batch, dropContainer);
1001  Minimizer minimizerClone (minimizer);
1002  localError += minimizerClone ((*this), weights, settingsAndBatch); /// call the minimizer
1003  }
1004  return localError;
1005  })
1006  );
1007  }
1008 
1009  for (auto& f : futures)
1010  error += f.get ();
1011  }
1012  else
1013  {
1014  for (auto& batch : batches)
1015  {
1016  std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1017  error += minimizer ((*this), weights, settingsAndBatch);
1018  }
1019  }
1020 
1021  numBatches_stored = std::max (numBatches_stored, size_t(1)); /// normalize the error
1022  error /= numBatches_stored;
1023  settings.testIteration ();
1024 
1025  return error;
1026  }
1027 
1028 
1029 
1030 
1031 
1032 /*! \brief compute the neural net
1033  *
1034  * \param input the input data
1035  * \param weights the weight data
1036  */
1037  template <typename Weights>
1038  std::vector<double> Net::compute (const std::vector<double>& input, const Weights& weights) const
1039  {
1040  std::vector<LayerData> layerData;
1041  layerData.reserve (m_layers.size ()+1);
1042  auto itWeight = begin (weights);
1043  auto itInputBegin = begin (input);
1044  auto itInputEnd = end (input);
1045  layerData.push_back (LayerData (itInputBegin, itInputEnd));
1046  size_t numNodesPrev = input.size ();
1047 
1048  // -------------------- prepare layer data with one pattern -------------------------------
1049  for (auto& layer: m_layers)
1050  {
1051  layerData.push_back (LayerData (layer.numNodes (), itWeight,
1052  layer.activationFunction (),
1053  layer.modeOutputValues ()));
1054  size_t _numWeights = layer.numWeights (numNodesPrev);
1055  itWeight += _numWeights;
1056  numNodesPrev = layer.numNodes ();
1057  }
1058 
1059 
1060  // --------- forward -------------
1061  forwardPattern (m_layers, layerData);
1062 
1063  // ------------- fetch output ------------------
1064  std::vector<double> output;
1065  fetchOutput (layerData.back (), output);
1066  return output;
1067  }
1068 
1069 
1070  template <typename Weights, typename PassThrough>
1071  double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights) const
1072  {
1073  std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
1074  assert (numWeights () == weights.size ());
1075  double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing, false);
1076  return error;
1077  }
1078 
1079  template <typename Weights, typename PassThrough, typename OutContainer>
1080  double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput /*eFetch*/, OutContainer& outputContainer) const
1081  {
1082  std::vector<double> nothing; // empty gradients; no backpropagation is done, just forward
1083  assert (numWeights () == weights.size ());
1084  double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer, true);
1085  return error;
1086  }
1087 
1088 
1089  template <typename Weights, typename Gradients, typename PassThrough>
1090  double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const
1091  {
1092  std::vector<double> nothing;
1093  assert (numWeights () == weights.size ());
1094  assert (weights.size () == gradients.size ());
1095  double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing, false);
1096  return error;
1097  }
1098 
1099  template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1100  double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const
1101  {
1102  MATH_UNUSED(eFetch);
1103  assert (numWeights () == weights.size ());
1104  assert (weights.size () == gradients.size ());
1105  double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer, true);
1106  return error;
1107  }
1108 
1109 
1110 
1111  template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1112  std::vector<std::vector<LayerData>> Net::prepareLayerData (LayerContainer& _layers,
1113  Batch& batch,
1114  const DropContainer& dropContainer,
1115  ItWeight itWeightBegin,
1116  ItWeight /*itWeightEnd*/,
1117  ItGradient itGradientBegin,
1118  ItGradient itGradientEnd,
1119  size_t& totalNumWeights) const
1120  {
1122  bool usesDropOut = !dropContainer.empty ();
1123  if (usesDropOut)
1124  itDropOut = std::begin (dropContainer);
1125 
1126  if (_layers.empty ())
1127  throw std::string ("no layers in this net");
1128 
1129 
1130  // ----------- create layer data -------------------------------------------------------
1131  //LM- This assert not needed anymore (outputsize is actually numNodes+1)
1132  //assert (_layers.back ().numNodes () == outputSize ());
1133  totalNumWeights = 0;
1134  size_t totalNumNodes = 0;
1135  std::vector<std::vector<LayerData>> layerPatternData;
1136  layerPatternData.reserve (_layers.size ()+1);
1137  ItWeight itWeight = itWeightBegin;
1138  ItGradient itGradient = itGradientBegin;
1139  size_t numNodesPrev = inputSize ();
1140  typename Pattern::const_iterator itInputBegin;
1141  typename Pattern::const_iterator itInputEnd;
1142 
1143  // ItWeight itGammaBegin = itWeightBegin + numWeights ();
1144  // ItWeight itBetaBegin = itWeightBegin + numWeights () + numNodes ();
1145  // ItGradient itGradGammaBegin = itGradientBegin + numWeights ();
1146  // ItGradient itGradBetaBegin = itGradientBegin + numWeights () + numNodes ();
1147 
1148 
1149  // --------------------- prepare layer data for input layer ----------------------------
1150  layerPatternData.push_back (std::vector<LayerData>());
1151  for (const Pattern& _pattern : batch)
1152  {
1153  std::vector<LayerData>& layerData = layerPatternData.back ();
1154  layerData.push_back (LayerData (numNodesPrev));
1155 
1156  itInputBegin = _pattern.beginInput ();
1157  itInputEnd = _pattern.endInput ();
1158  layerData.back ().setInput (itInputBegin, itInputEnd);
1159 
1160  if (usesDropOut)
1161  layerData.back ().setDropOut (itDropOut);
1162 
1163  }
1164 
1165 
1166  if (usesDropOut)
1167  itDropOut += _layers.back ().numNodes ();
1168 
1169  // ---------------- prepare subsequent layers ---------------------------------------------
1170  // for each of the layers
1171  for (auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1172  {
1173  bool isOutputLayer = (itLayer+1 == itLayerEnd);
1174  bool isFirstHiddenLayer = (itLayer == begin (_layers));
1175 
1176  auto& layer = *itLayer;
1177  layerPatternData.push_back (std::vector<LayerData>());
1178  // for each pattern, prepare a layerData
1179  for (const Pattern& _pattern : batch)
1180  {
1181  std::vector<LayerData>& layerData = layerPatternData.back ();
1182  //layerData.push_back (LayerData (numNodesPrev));
1183 
1184  if (itGradientBegin == itGradientEnd)
1185  {
1186  layerData.push_back (LayerData (layer.numNodes (), itWeight,
1187  layer.activationFunction (),
1188  layer.modeOutputValues ()));
1189  }
1190  else
1191  {
1192  layerData.push_back (LayerData (layer.numNodes (), itWeight, itGradient,
1193  layer.activationFunction (),
1194  layer.inverseActivationFunction (),
1195  layer.modeOutputValues ()));
1196  }
1197 
1198  if (usesDropOut)
1199  {
1200  layerData.back ().setDropOut (itDropOut);
1201  }
1202 
1203  }
1204 
1205  if (usesDropOut)
1206  {
1207  itDropOut += layer.numNodes ();
1208  }
1209  size_t _numWeights = layer.numWeights (numNodesPrev);
1210  totalNumWeights += _numWeights;
1211  itWeight += _numWeights;
1212  itGradient += _numWeights;
1213  numNodesPrev = layer.numNodes ();
1214  totalNumNodes += numNodesPrev;
1215 
1216  }
1217  assert (totalNumWeights > 0);
1218  return layerPatternData;
1219 }
1220 
1221 
1222 
1223  template <typename LayerContainer>
1224  void Net::forwardPattern (const LayerContainer& _layers,
1225  std::vector<LayerData>& layerData) const
1226  {
1227  size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1228  size_t cumulativeNodeCount = 0;
1229  for (; idxLayer < idxLayerEnd; ++idxLayer)
1230  {
1231  LayerData& prevLayerData = layerData.at (idxLayer);
1232  LayerData& currLayerData = layerData.at (idxLayer+1);
1233 
1234  forward (prevLayerData, currLayerData);
1235 
1236  applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1237  }
1238  }
1239 
1240 
1241 
1242 
1243  template <typename LayerContainer, typename LayerPatternContainer>
1244  void Net::forwardBatch (const LayerContainer& _layers,
1245  LayerPatternContainer& layerPatternData,
1246  std::vector<double>& valuesMean,
1247  std::vector<double>& valuesStdDev,
1248  size_t trainFromLayer) const
1249  {
1250  valuesMean.clear ();
1251  valuesStdDev.clear ();
1252 
1253  // ---------------------------------- loop over layers and pattern -------------------------------------------------------
1254  size_t cumulativeNodeCount = 0;
1255  for (size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1256  {
1257  bool doTraining = idxLayer >= trainFromLayer;
1258 
1259  // get layer-pattern data for this and the corresponding one from the next layer
1260  std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1261  std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1262 
1263  size_t numPattern = prevLayerPatternData.size ();
1264  size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1265 
1266  std::vector<MeanVariance> means (numNodesLayer);
1267  // ---------------- loop over layerDatas of pattern compute forward ----------------------------
1268  for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1269  {
1270  const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1271  LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1272 
1273 
1274  forward (prevLayerData, currLayerData); // feed forward
1275  }
1276 
1277  // ---------------- loop over layerDatas of pattern apply non-linearities ----------------------------
1278  for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1279  {
1280  //const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1281  LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1282 
1283  if (doTraining)
1284  applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (),
1285  currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ());
1286  else
1287  applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1288  }
1289 
1290  // accumulate node count
1291  cumulativeNodeCount += numNodesLayer;
1292  }
1293 }
1294 
1295 
1296 
1297 
1298  template <typename OutputContainer>
1299  void Net::fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const
1300  {
1301  ModeOutputValues eModeOutput = lastLayerData.outputMode ();
1302  if (isFlagSet (ModeOutputValues::DIRECT, eModeOutput))
1303  {
1304  outputContainer.insert (outputContainer.end (), lastLayerData.valuesBegin (), lastLayerData.valuesEnd ());
1305  }
1306  else if (isFlagSet (ModeOutputValues::SIGMOID, eModeOutput) ||
1307  isFlagSet (ModeOutputValues::SOFTMAX, eModeOutput))
1308  {
1309  const auto& prob = lastLayerData.probabilities ();
1310  outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1311  }
1312  else
1313  assert (false);
1314  }
1315 
1316 
1317 
1318 
1319  template <typename OutputContainer>
1320  void Net::fetchOutput (const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer) const
1321  {
1322  for (const LayerData& lastLayerData : lastLayerPatternData)
1323  fetchOutput (lastLayerData, outputContainer);
1324  }
1325 
1326 
1327 
1328  template <typename ItWeight>
1329  std::tuple</*sumError*/double,/*sumWeights*/double> Net::computeError (const Settings& settings,
1330  std::vector<LayerData>& lastLayerData,
1331  Batch& batch,
1332  ItWeight itWeightBegin,
1333  ItWeight itWeightEnd) const
1334  {
1335  typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1336 // typename std::vector<LayerData>::iterator itLayerDataEnd = lastLayerData.end ();
1337 
1338  typename std::vector<Pattern>::const_iterator itPattern = batch.begin ();
1339  typename std::vector<Pattern>::const_iterator itPatternEnd = batch.end ();
1340 
1341  double sumWeights (0.0);
1342  double sumError (0.0);
1343 
1344  size_t idxPattern = 0;
1345 // FIXME: check that iteration doesn't go beyond itLayerDataEnd!
1346  for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1347  {
1348  ++idxPattern;
1349 
1350  // compute E and the deltas of the computed output and the true output
1351  LayerData& layerData = (*itLayerData);
1352  const Pattern& _pattern = (*itPattern);
1353  double error = errorFunction (layerData, _pattern.output (),
1354  itWeightBegin, itWeightEnd,
1355  _pattern.weight (), settings.factorWeightDecay (),
1356  settings.regularization ());
1357  sumWeights += fabs (_pattern.weight ());
1358  sumError += error;
1359  }
1360  return std::make_tuple (sumError, sumWeights);
1361  }
1362 
1363 
1364 
1365  template <typename Settings>
1366  void Net::backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1367  const Settings& settings,
1368  size_t trainFromLayer,
1369  size_t totalNumWeights) const
1370  {
1371  bool doTraining = layerPatternData.size () > trainFromLayer;
1372  if (doTraining) // training
1373  {
1374  // ------------- backpropagation -------------
1375  size_t idxLayer = layerPatternData.size ();
1376  for (auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1377  itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1378  {
1379  --idxLayer;
1380  if (idxLayer <= trainFromLayer) // no training
1381  break;
1382 
1383  std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1384  std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1385 
1386  size_t idxPattern = 0;
1387 // FIXME: check that itPrevLayerData doesn't go beyond itPrevLayerDataEnd!
1388  for (typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1389  itPrevLayerData = begin (prevLayerDataColl) /*, itPrevLayerDataEnd = end (prevLayerDataColl)*/;
1390  itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)
1391  {
1392  LayerData& currLayerData = (*itCurrLayerData);
1393  LayerData& prevLayerData = *(itPrevLayerData);
1394 
1395  backward (prevLayerData, currLayerData);
1396 
1397  // the factorWeightDecay has to be scaled by 1/n where n is the number of weights (synapses)
1398  // because L1 and L2 regularization
1399  //
1400  // http://neuralnetworksanddeeplearning.com/chap3.html#overfitting_and_regularization
1401  //
1402  // L1 : -factorWeightDecay*sgn(w)/numWeights
1403  // L2 : -factorWeightDecay/numWeights
1404  update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization ());
1405  }
1406  }
1407  }
1408  }
1409 
1410 
1411 
1412 /*! \brief forward propagation and backward propagation
1413  *
1414  *
1415  */
1416  template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1417  double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch,
1418  ItWeight itWeightBegin, ItWeight itWeightEnd,
1419  ItGradient itGradientBegin, ItGradient itGradientEnd,
1420  size_t trainFromLayer,
1421  OutContainer& outputContainer, bool doFetchOutput) const
1422  {
1423  Settings& settings = std::get<0>(settingsAndBatch);
1424  Batch& batch = std::get<1>(settingsAndBatch);
1425  DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1426 
1427  double sumError = 0.0;
1428  double sumWeights = 0.0; // -------------
1429 
1430 
1431  // ----------------------------- prepare layer data -------------------------------------
1432  size_t totalNumWeights (0);
1433  std::vector<std::vector<LayerData>> layerPatternData = prepareLayerData (_layers,
1434  batch,
1435  dropContainer,
1436  itWeightBegin,
1437  itWeightEnd,
1438  itGradientBegin,
1439  itGradientEnd,
1440  totalNumWeights);
1441 
1442 
1443 
1444  // ---------------------------------- propagate forward ------------------------------------------------------------------
1445  std::vector<double> valuesMean;
1446  std::vector<double> valuesStdDev;
1447  forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1448 
1449 
1450  // ------------- fetch output ------------------
1451  if (doFetchOutput)
1452  {
1453  fetchOutput (layerPatternData.back (), outputContainer);
1454  }
1455 
1456 
1457  // ------------- error computation -------------
1458  std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1459 
1460 
1461  // ------------- backpropagation -------------
1462  backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1463 
1464 
1465  // --- compile the measures
1466  double batchSize = std::distance (std::begin (batch), std::end (batch));
1467  for (auto it = itGradientBegin; it != itGradientEnd; ++it)
1468  (*it) /= batchSize;
1469 
1470 
1471  sumError /= sumWeights;
1472  return sumError;
1473  }
1474 
1475 
1476 
1477 /*! \brief initialization of the weights
1478  *
1479  *
1480  */
1481  template <typename OutIterator>
1482  void Net::initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
1483  {
1484  if (eInitStrategy == WeightInitializationStrategy::XAVIER)
1485  {
1486  // input and output properties
1487  int numInput = inputSize ();
1488 
1489  // compute variance and mean of input and output
1490  //...
1491 
1492 
1493  // compute the weights
1494  for (auto& layer: layers ())
1495  {
1496  double nIn = numInput;
1497  double stdDev = sqrt (2.0/nIn);
1498  for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1499  {
1500  (*itWeight) = DNN::gaussDouble (0.0, stdDev); // factor 2.0 for ReLU
1501  ++itWeight;
1502  }
1503  numInput = layer.numNodes ();
1504  }
1505  return;
1506  }
1507 
1508  if (eInitStrategy == WeightInitializationStrategy::XAVIERUNIFORM)
1509  {
1510  // input and output properties
1511  int numInput = inputSize ();
1512 
1513  // compute variance and mean of input and output
1514  //...
1515 
1516 
1517  // compute the weights
1518  for (auto& layer: layers ())
1519  {
1520  double nIn = numInput;
1521  double minVal = -sqrt(2.0/nIn);
1522  double maxVal = sqrt (2.0/nIn);
1523  for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1524  {
1525 
1526  (*itWeight) = DNN::uniformDouble (minVal, maxVal); // factor 2.0 for ReLU
1527  ++itWeight;
1528  }
1529  numInput = layer.numNodes ();
1530  }
1531  return;
1532  }
1533 
1534  if (eInitStrategy == WeightInitializationStrategy::TEST)
1535  {
1536  // input and output properties
1537  int numInput = inputSize ();
1538 
1539  // compute variance and mean of input and output
1540  //...
1541 
1542 
1543  // compute the weights
1544  for (auto& layer: layers ())
1545  {
1546 // double nIn = numInput;
1547  for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1548  {
1549  (*itWeight) = DNN::gaussDouble (0.0, 0.1);
1550  ++itWeight;
1551  }
1552  numInput = layer.numNodes ();
1553  }
1554  return;
1555  }
1556 
1557  if (eInitStrategy == WeightInitializationStrategy::LAYERSIZE)
1558  {
1559  // input and output properties
1560  int numInput = inputSize ();
1561 
1562  // compute variance and mean of input and output
1563  //...
1564 
1565 
1566  // compute the weights
1567  for (auto& layer: layers ())
1568  {
1569  double nIn = numInput;
1570  for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1571  {
1572  (*itWeight) = DNN::gaussDouble (0.0, sqrt (layer.numWeights (nIn))); // factor 2.0 for ReLU
1573  ++itWeight;
1574  }
1575  numInput = layer.numNodes ();
1576  }
1577  return;
1578  }
1579 
1580  }
1581 
1582 
1583 
1584 
1585 
1586 /*! \brief compute the error function
1587  *
1588  *
1589  */
1590  template <typename Container, typename ItWeight>
1591  double Net::errorFunction (LayerData& layerData,
1592  Container truth,
1593  ItWeight itWeight,
1594  ItWeight itWeightEnd,
1595  double patternWeight,
1596  double factorWeightDecay,
1597  EnumRegularization eRegularization) const
1598  {
1599  double error (0);
1600  switch (m_eErrorFunction)
1601  {
1603  {
1604  error = sumOfSquares (layerData.valuesBegin (), layerData.valuesEnd (), begin (truth), end (truth),
1605  layerData.deltasBegin (), layerData.deltasEnd (),
1606  layerData.inverseActivationFunction (),
1607  patternWeight);
1608  break;
1609  }
1611  {
1612  assert (!TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.outputMode ()));
1613  std::vector<double> probabilities = layerData.probabilities ();
1614  error = crossEntropy (begin (probabilities), end (probabilities),
1615  begin (truth), end (truth),
1616  layerData.deltasBegin (), layerData.deltasEnd (),
1617  layerData.inverseActivationFunction (),
1618  patternWeight);
1619  break;
1620  }
1622  {
1623  std::cout << "softmax." << std::endl;
1624  assert (!TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.outputMode ()));
1625  std::vector<double> probabilities = layerData.probabilities ();
1626  error = softMaxCrossEntropy (begin (probabilities), end (probabilities),
1627  begin (truth), end (truth),
1628  layerData.deltasBegin (), layerData.deltasEnd (),
1629  layerData.inverseActivationFunction (),
1630  patternWeight);
1631  break;
1632  }
1633  }
1634  if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE)
1635  {
1636  error = weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);
1637  }
1638  return error;
1639  }
1640 
1641 
1642 
1643 
1644 
1645 
1646 
1647 // /*! \brief pre-training
1648 // *
1649 // * in development
1650 // */
1651 // template <typename Minimizer>
1652 // void Net::preTrain (std::vector<double>& weights,
1653 // std::vector<Pattern>& trainPattern,
1654 // const std::vector<Pattern>& testPattern,
1655 // Minimizer& minimizer, Settings& settings)
1656 // {
1657 // auto itWeightGeneral = std::begin (weights);
1658 // std::vector<Pattern> prePatternTrain (trainPattern.size ());
1659 // std::vector<Pattern> prePatternTest (testPattern.size ());
1660 
1661 // size_t _inputSize = inputSize ();
1662 
1663 // // transform pattern using the created preNet
1664 // auto initializePrePattern = [&](const std::vector<Pattern>& pttrnInput, std::vector<Pattern>& pttrnOutput)
1665 // {
1666 // pttrnOutput.clear ();
1667 // std::transform (std::begin (pttrnInput), std::end (pttrnInput),
1668 // std::back_inserter (pttrnOutput),
1669 // [](const Pattern& p)
1670 // {
1671 // Pattern pat (p.input (), p.input (), p.weight ());
1672 // return pat;
1673 // });
1674 // };
1675 
1676 // initializePrePattern (trainPattern, prePatternTrain);
1677 // initializePrePattern (testPattern, prePatternTest);
1678 
1679 // std::vector<double> originalDropFractions = settings.dropFractions ();
1680 
1681 // for (auto& _layer : layers ())
1682 // {
1683 // // compute number of weights (as a function of the number of incoming nodes)
1684 // // fetch number of nodes
1685 // size_t numNodes = _layer.numNodes ();
1686 // size_t _numWeights = _layer.numWeights (_inputSize);
1687 
1688 // // ------------------
1689 // DNN::Net preNet;
1690 // if (!originalDropFractions.empty ())
1691 // {
1692 // originalDropFractions.erase (originalDropFractions.begin ());
1693 // settings.setDropOut (originalDropFractions.begin (), originalDropFractions.end (), settings.dropRepetitions ());
1694 // }
1695 // std::vector<double> preWeights;
1696 
1697 // // define the preNet (pretraining-net) for this layer
1698 // // outputSize == inputSize, because this is an autoencoder;
1699 // preNet.setInputSize (_inputSize);
1700 // preNet.addLayer (DNN::Layer (numNodes, _layer.activationFunctionType ()));
1701 // preNet.addLayer (DNN::Layer (_inputSize, DNN::EnumFunction::LINEAR, DNN::ModeOutputValues::DIRECT));
1702 // preNet.setErrorFunction (DNN::ModeErrorFunction::SUMOFSQUARES);
1703 // preNet.setOutputSize (_inputSize); // outputSize is the inputSize (autoencoder)
1704 
1705 // // initialize weights
1706 // preNet.initializeWeights (DNN::WeightInitializationStrategy::XAVIERUNIFORM,
1707 // std::back_inserter (preWeights));
1708 
1709 // // overwrite already existing weights from the "general" weights
1710 // std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ());
1711 // std::copy (itWeightGeneral, itWeightGeneral+_numWeights, preWeights.begin ()+_numWeights); // set identical weights for the temporary output layer
1712 
1713 
1714 // // train the "preNet"
1715 // preNet.train (preWeights, prePatternTrain, prePatternTest, minimizer, settings);
1716 
1717 // // fetch the pre-trained weights (without the output part of the autoencoder)
1718 // std::copy (std::begin (preWeights), std::begin (preWeights) + _numWeights, itWeightGeneral);
1719 
1720 // // advance the iterator on the incoming weights
1721 // itWeightGeneral += _numWeights;
1722 
1723 // // remove the weights of the output layer of the preNet
1724 // preWeights.erase (preWeights.begin () + _numWeights, preWeights.end ());
1725 
1726 // // remove the outputLayer of the preNet
1727 // preNet.removeLayer ();
1728 
1729 // // set the output size to the number of nodes in the new output layer (== last hidden layer)
1730 // preNet.setOutputSize (numNodes);
1731 
1732 // // transform pattern using the created preNet
1733 // auto proceedPattern = [&](std::vector<Pattern>& pttrn)
1734 // {
1735 // std::vector<Pattern> newPttrn;
1736 // std::for_each (std::begin (pttrn), std::end (pttrn),
1737 // [&preNet,&preWeights,&newPttrn](Pattern& p)
1738 // {
1739 // std::vector<double> output = preNet.compute (p.input (), preWeights);
1740 // Pattern pat (output, output, p.weight ());
1741 // newPttrn.push_back (pat);
1742 // // p = pat;
1743 // });
1744 // return newPttrn;
1745 // };
1746 
1747 
1748 // prePatternTrain = proceedPattern (prePatternTrain);
1749 // prePatternTest = proceedPattern (prePatternTest);
1750 
1751 
1752 // // the new input size is the output size of the already reduced preNet
1753 // _inputSize = preNet.layers ().back ().numNodes ();
1754 // }
1755 // }
1756 
1757 
1758 
1759 
1760 
1761 
1762 
1763 
1764 
1765 
1766 
1767 
1768 
1769 
1770 
1771 
1772  } // namespace DNN
1773 } // namespace TMVA
1774 
1775 #endif
void addPoint(std::string histoName, double x)
for monitoring
Definition: NeuralNet.h:828
static std::shared_ptr< std::function< double(double)> > InvGauss
Definition: NeuralNet.icc:78
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition: NeuralNet.h:612
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
Definition: NeuralNet.icc:1329
size_t convergenceCount() const
returns the current convergence count
Definition: NeuralNet.h:834
virtual void cycle(double progress, TString text)
Definition: NeuralNet.h:806
double tanh(double)
static std::shared_ptr< std::function< double(double)> > Tanh
Definition: NeuralNet.icc:56
std::vector< char > DropContainer
Definition: NeuralNet.h:220
static std::shared_ptr< std::function< double(double)> > InvReLU
Definition: NeuralNet.icc:66
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:595
bool isFlagSet(T flag, T value)
Definition: NeuralNet.h:213
static std::shared_ptr< std::function< double(double)> > InvTanh
Definition: NeuralNet.icc:57
double T(double x)
Definition: ChebyshevPol.h:34
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
Definition: NeuralNet.icc:1244
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition: NeuralNet.h:773
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
Definition: NeuralNet.icc:1224
std::vector< double > & output()
Definition: Pattern.h:84
const std::vector< double > & dropFractions() const
Definition: NeuralNet.h:769
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
Definition: NeuralNet.icc:571
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition: NeuralNet.h:820
Basic string class.
Definition: TString.h:125
bool useMultithreading() const
is multithreading turned on?
Definition: NeuralNet.h:822
Definition: Pattern.h:7
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
Definition: NeuralNet.icc:940
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition: NeuralNet.h:830
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
Definition: NeuralNet.icc:182
virtual void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:813
static std::shared_ptr< std::function< double(double)> > InvSoftSign
Definition: NeuralNet.icc:75
static std::shared_ptr< std::function< double(double)> > TanhShift
Definition: NeuralNet.icc:71
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
static std::shared_ptr< std::function< double(double)> > Sigmoid
Definition: NeuralNet.icc:53
virtual void startTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:811
double sqrt(double)
Double_t x[n]
Definition: legend1.C:17
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:497
ModeOutputValues outputMode() const
returns the output mode
Definition: NeuralNet.h:592
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition: NeuralNet.h:611
double pow(double, double)
static std::shared_ptr< std::function< double(double)> > SymmReLU
Definition: NeuralNet.icc:62
const_iterator end() const
Definition: NeuralNet.h:246
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition: NeuralNet.h:374
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition: NeuralNet.h:826
T uniformFromTo(T from, T to)
Definition: NeuralNet.icc:32
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
Definition: NeuralNet.icc:651
size_t testRepetitions() const
how often is the test data tested
Definition: NeuralNet.h:775
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
Definition: NeuralNet.icc:1299
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1482
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
size_t dropRepetitions() const
Definition: NeuralNet.h:768
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition: NeuralNet.h:593
Double_t(* Function)(Double_t)
Definition: Functor.C:4
double factorWeightDecay() const
get the weight-decay factor
Definition: NeuralNet.h:776
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition: NeuralNet.h:795
RooCmdArg Minimizer(const char *type, const char *alg=0)
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
Definition: NeuralNet.icc:1071
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
Definition: NeuralNet.icc:457
char * Form(const char *fmt,...)
static std::shared_ptr< std::function< double(double)> > InvSoftPlus
Definition: NeuralNet.icc:69
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:205
double computeRegularization(double weight, const double &factorWeightDecay)
compute the regularization (L1, L2)
Definition: NeuralNet.icc:208
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
Definition: NeuralNet.icc:1591
void pads(int numPads)
preparation for monitoring
Definition: NeuralNet.h:825
std::vector< double >::const_iterator const_iterator
Definition: Pattern.h:12
static std::shared_ptr< std::function< double(double)> > SoftSign
Definition: NeuralNet.icc:74
Settings for the training of the neural net.
Definition: NeuralNet.h:736
virtual void startTrainCycle()
Definition: NeuralNet.h:789
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition: NeuralNet.h:1056
constexpr Double_t E()
Definition: TMath.h:74
static std::shared_ptr< std::function< double(double)> > ReLU
Definition: NeuralNet.icc:65
static std::shared_ptr< std::function< double(double)> > InvSigmoid
Definition: NeuralNet.icc:54
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition: NeuralNet.h:377
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
static std::shared_ptr< std::function< double(double)> > GaussComplement
Definition: NeuralNet.icc:80
double weight() const
Definition: Pattern.h:74
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition: NeuralNet.h:587
ModeOutputValues
Definition: NeuralNet.h:179
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
Definition: NeuralNet.icc:1112
double gaussDouble(double mean, double sigma)
Definition: NeuralNet.cxx:14
The Batch class encapsulates one mini-batch.
Definition: NeuralNet.h:235
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
Definition: NeuralNet.icc:1038
double m_beta
internal parameter (momentum)
Definition: NeuralNet.h:373
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:711
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition: NeuralNet.h:835
static RooMathCoreReg dummy
static std::shared_ptr< std::function< double(double)> > Gauss
Definition: NeuralNet.icc:77
static constexpr double s
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
Definition: NeuralNet.icc:545
double uniformDouble(double minValue, double maxValue)
Definition: NeuralNet.cxx:22
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
Definition: NeuralNet.icc:270
static std::shared_ptr< std::function< double(double)> > Linear
Definition: NeuralNet.icc:59
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:596
Abstract ClassifierFactory template that handles arbitrary types.
#define MATH_UNUSED(var)
Definition: Util.h:26
static std::shared_ptr< std::function< double(double)> > InvGaussComplement
Definition: NeuralNet.icc:81
static std::shared_ptr< std::function< double(double)> > InvLinear
Definition: NeuralNet.icc:60
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
Definition: NeuralNet.icc:1417
size_t size() const
Definition: NeuralNet.h:248
size_t batchSize() const
mini-batch size
Definition: NeuralNet.h:774
virtual void endTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:812
static std::shared_ptr< std::function< double(double)> > InvTanhShift
Definition: NeuralNet.icc:72
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition: NeuralNet.h:376
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition: NeuralNet.h:601
void backPropagate(std::vector< std::vector< LayerData >> &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Definition: NeuralNet.icc:1366
Definition: Rtypes.h:59
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition: NeuralNet.h:816
const_iterator begin() const
Definition: NeuralNet.h:245
DropContainer::const_iterator const_dropout_iterator
Definition: NeuralNet.h:449
double m_alpha
internal parameter (learningRate)
Definition: NeuralNet.h:372
double exp(double)
EnumRegularization
Definition: NeuralNet.h:173
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition: NeuralNet.h:586
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
static std::shared_ptr< std::function< double(double)> > SoftPlus
Definition: NeuralNet.icc:68
const Int_t n
Definition: legend1.C:16
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:411
static std::shared_ptr< std::function< double(double)> > ZeroFnc
Definition: NeuralNet.icc:50
virtual bool hasConverged(double testError)
has this training converged already?
Definition: NeuralNet.cxx:467
LayerData holds the data of one layer.
Definition: NeuralNet.h:437
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition: NeuralNet.h:788
double log(double)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition: NeuralNet.h:1301
static constexpr double g
static std::shared_ptr< std::function< double(double)> > InvSymmReLU
Definition: NeuralNet.icc:63