Logo ROOT   master
Reference Guide
NeuralNet.h
Go to the documentation of this file.
1 /**
2  * @file NeuralNet
3  * @author Peter Speckmayer
4  * @version 1.0
5  *
6  * @section LICENSE
7  *
8  *
9  * @section Neural net implementation
10  *
11  * An implementation of a neural net for TMVA. This neural net uses multithreading
12  *
13  */
14 
15 
16 //////////////////////////////////////////////////////////////////////////
17 // //
18 // NeuralNet //
19 // //
20 // A neural net implementation //
21 // //
22 //////////////////////////////////////////////////////////////////////////
23 
24 #ifndef TMVA_NEURAL_NET
25 #define TMVA_NEURAL_NET
26 #pragma once
27 
28 #include <vector>
29 #include <iostream>
30 #include <fstream>
31 #include <algorithm>
32 #include <iterator>
33 #include <functional>
34 #include <tuple>
35 #include <cmath>
36 #include <cassert>
37 #include <random>
38 #include <thread>
39 #include <future>
40 #include <type_traits>
41 
42 #include "Pattern.h"
43 #include "Monitoring.h"
44 
45 #include "TApplication.h"
46 #include "Timer.h"
47 
48 #include "TH1F.h"
49 #include "TH2F.h"
50 
51 #include <fenv.h> // turn on or off exceptions for NaN and other numeric exceptions
52 
53 
54 namespace TMVA
55 {
56 
57  class IPythonInteractive;
58 
59  namespace DNN
60  {
61 
62  // double gaussDoubl (edouble mean, double sigma);
63 
64 
65 
66  double gaussDouble (double mean, double sigma);
67  double uniformDouble (double minValue, double maxValue);
68  int randomInt (int maxValue);
69 
70 
71 
72 
74  {
75  public:
77  : m_n(0)
78  , m_sumWeights(0)
79  , m_mean(0)
80  , m_squared(0)
81  {}
82 
83  inline void clear()
84  {
85  m_n = 0;
86  m_sumWeights = 0;
87  m_mean = 0;
88  m_squared = 0;
89  }
90 
91  template <typename T>
92  inline void add(T value, double weight = 1.0)
93  {
94  ++m_n; // a value has been added
95 
96  if (m_n == 1) // initialization
97  {
98  m_mean = value;
99  m_squared = 0.0;
100  m_sumWeights = weight;
101  return;
102  }
103 
104  double tmpWeight = m_sumWeights+weight;
105  double Q = value - m_mean;
106 
107  double R = Q*weight/tmpWeight;
108  m_mean += R;
110 
111  m_sumWeights = tmpWeight;
112  }
113 
114  template <typename ITERATOR>
115  inline void add (ITERATOR itBegin, ITERATOR itEnd)
116  {
117  for (ITERATOR it = itBegin; it != itEnd; ++it)
118  add (*it);
119  }
120 
121 
122 
123  inline int count() const { return m_n; }
124  inline double weights() const { if(m_n==0) return 0; return m_sumWeights; }
125  inline double mean() const { if(m_n==0) return 0; return m_mean; }
126  inline double var() const
127  {
128  if(m_n==0)
129  return 0;
130  if (m_squared <= 0)
131  return 0;
132  return (m_squared/m_sumWeights);
133  }
134 
135  inline double var_corr () const
136  {
137  if (m_n <= 1)
138  return var ();
139 
140  return (var()*m_n/(m_n-1)); // unbiased for small sample sizes
141  }
142 
143  inline double stdDev_corr () const { return sqrt( var_corr() ); }
144  inline double stdDev () const { return sqrt( var() ); } // unbiased for small sample sizes
145 
146  private:
147  size_t m_n;
148  double m_sumWeights;
149  double m_mean;
150  double m_squared;
151  };
152 
153 
154 
155  enum class EnumFunction
156  {
157  ZERO = '0',
158  LINEAR = 'L',
159  TANH = 'T',
160  RELU = 'R',
161  SYMMRELU = 'r',
162  TANHSHIFT = 't',
163  SIGMOID = 's',
164  SOFTSIGN = 'S',
165  GAUSS = 'G',
166  GAUSSCOMPLEMENT = 'C'
167  };
168 
169 
170 
172  {
173  NONE, L1, L2, L1MAX
174  };
175 
176 
177  enum class ModeOutputValues : int
178  {
179  DIRECT = 0x01,
180  SIGMOID = 0x02,
181  SOFTMAX = 0x04,
182  BATCHNORMALIZATION = 0x08
183  };
184 
185 
186 
188  {
190  }
191 
193  {
195  return lhs;
196  }
197 
199  {
201  }
202 
204  {
206  return lhs;
207  }
208 
209 
210  template <typename T>
211  bool isFlagSet (T flag, T value)
212  {
213  return (int)(value & flag) != 0;
214  }
215 
216 
217 
218  class Net;
219 
220 
221 
222 
223 
224 
225 
226  typedef std::vector<char> DropContainer;
227 
228 
229  /*! \brief The Batch class encapsulates one mini-batch
230  *
231  * Holds a const_iterator to the beginning and the end of one batch in a vector of Pattern
232  */
233  class Batch
234  {
235  public:
236  typedef typename std::vector<Pattern>::const_iterator const_iterator;
237 
238  Batch (typename std::vector<Pattern>::const_iterator itBegin, typename std::vector<Pattern>::const_iterator itEnd)
239  : m_itBegin (itBegin)
240  , m_itEnd (itEnd)
241  {}
242 
243  const_iterator begin () const { return m_itBegin; }
244  const_iterator end () const { return m_itEnd; }
245 
246  size_t size () const { return std::distance (begin (), end ()); }
247 
248  private:
249  const_iterator m_itBegin; ///< iterator denoting the beginning of the batch
250  const_iterator m_itEnd; ///< iterator denoting the end of the batch
251  };
252 
253 
254 
255 
256 
257 
258  template <typename ItSource, typename ItWeight, typename ItTarget>
259  void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd);
260 
261 
262 
263  template <typename ItSource, typename ItWeight, typename ItPrev>
264  void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd);
265 
266 
267 
268 
269 
270  template <typename ItValue, typename ItFunction>
271  void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction);
272 
273 
274  template <typename ItValue, typename ItFunction, typename ItInverseFunction, typename ItGradient>
275  void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction, ItInverseFunction itInverseFunction, ItGradient itGradient);
276 
277 
278 
279  template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
280  void update (ItSource itSource, ItSource itSourceEnd,
281  ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
282  ItTargetGradient itTargetGradientBegin,
283  ItGradient itGradient);
284 
285 
286 
287  template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
288  void update (ItSource itSource, ItSource itSourceEnd,
289  ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
290  ItTargetGradient itTargetGradientBegin,
291  ItGradient itGradient,
292  ItWeight itWeight, double weightDecay);
293 
294 
295 
296  // ----- signature of a minimizer -------------
297  // class Minimizer
298  // {
299  // public:
300 
301  // template <typename Function, typename Variables, typename PassThrough>
302  // double operator() (Function& fnc, Variables& vars, PassThrough& passThrough)
303  // {
304  // // auto itVars = begin (vars);
305  // // auto itVarsEnd = end (vars);
306 
307  // std::vector<double> myweights;
308  // std::vector<double> gradients;
309 
310  // double value = fnc (passThrough, myweights);
311  // value = fnc (passThrough, myweights, gradients);
312  // return value;
313  // }
314  // };
315 
316 
317 
318  ///< list all the minimizer types
320  {
321  fSteepest ///< SGD
322  };
323 
324 
325 
326 
327 
328  /*! \brief Steepest Gradient Descent algorithm (SGD)
329  *
330  * Implements a steepest gradient descent minimization algorithm
331  */
332  class Steepest
333  {
334  public:
335 
337 
338 
339  /*! \brief c'tor
340  *
341  * C'tor
342  *
343  * \param learningRate denotes the learning rate for the SGD algorithm
344  * \param momentum fraction of the velocity which is taken over from the last step
345  * \param repetitions re-compute the gradients each "repetitions" steps
346  */
347  Steepest (double learningRate = 1e-4,
348  double momentum = 0.5,
349  size_t repetitions = 10)
350  : m_repetitions (repetitions)
351  , m_alpha (learningRate)
352  , m_beta (momentum)
353  {}
354 
355  /*! \brief operator to call the steepest gradient descent algorithm
356  *
357  * entry point to start the minimization procedure
358  *
359  * \param fitnessFunction (templated) function which has to be provided. This function is minimized
360  * \param weights (templated) a reference to a container of weights. The result of the minimization procedure
361  * is returned via this reference (needs to support std::begin and std::end
362  * \param passThrough (templated) object which can hold any data which the fitness function needs. This object
363  * is not touched by the minimizer; This object is provided to the fitness function when
364  * called
365  */
366  template <typename Function, typename Weights, typename PassThrough>
367  double operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough);
368 
369 
370  double m_alpha; ///< internal parameter (learningRate)
371  double m_beta; ///< internal parameter (momentum)
372  std::vector<double> m_prevGradients; ///< vector remembers the gradients of the previous step
373 
374  std::vector<double> m_localWeights; ///< local weights for reuse in thread.
375  std::vector<double> m_localGradients; ///< local gradients for reuse in thread.
376  };
377 
378 
379 
380 
381 
382 
383 
384 
385 
386 
387 
388 
389 
390 
391 
392 
393 
394 
395  template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
396  double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
397 
398 
399 
400  template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
401  double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
402 
403 
404 
405 
406  template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
407  double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
408 
409 
410 
411 
412 
413  template <typename ItWeight>
414  double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization);
415 
416 
417 
418 
419 
420 
421 
422 
423 
424 
425 
426 
427 
428 
429  /*! \brief LayerData holds the data of one layer
430  *
431  * LayerData holds the data of one layer, but not its layout
432  *
433  *
434  */
435  class LayerData
436  {
437  public:
438  typedef std::vector<double> container_type;
439 
440  typedef container_type::iterator iterator_type;
441  typedef container_type::const_iterator const_iterator_type;
442 
443  typedef std::vector<std::function<double(double)> > function_container_type;
444  typedef function_container_type::iterator function_iterator_type;
445  typedef function_container_type::const_iterator const_function_iterator_type;
446 
447  typedef DropContainer::const_iterator const_dropout_iterator;
448 
449  /*! \brief c'tor of LayerData
450  *
451  * C'tor of LayerData for the input layer
452  *
453  * \param itInputBegin iterator to the begin of a vector which holds the values of the nodes of the neural net
454  * \param itInputEnd iterator to the end of a vector which holdsd the values of the nodes of the neural net
455  * \param eModeOutput indicates a potential tranformation of the output values before further computation
456  * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
457  * output value (to create a probability); SOFTMAX applies a softmax transformation to all
458  * output values (mutually exclusive probability)
459  */
461 
462 
463  /*! \brief c'tor of LayerData
464  *
465  * C'tor of LayerData for the input layer
466  *
467  * \param inputSize input size of this layer
468  */
469  LayerData (size_t inputSize);
471 
472 
473  /*! \brief c'tor of LayerData
474  *
475  * C'tor of LayerData for all layers which are not the input layer; Used during the training of the DNN
476  *
477  * \param size size of the layer
478  * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
479  * \param itGradientBegin indicates the start of the gradients for this layer on the gradient vector
480  * \param itFunctionBegin indicates the start of the vector of activation functions for this layer on the
481  * activation function vector
482  * \param itInverseFunctionBegin indicates the start of the vector of activation functions for this
483  * layer on the activation function vector
484  * \param eModeOutput indicates a potential tranformation of the output values before further computation
485  * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
486  * output value (to create a probability); SOFTMAX applies a softmax transformation to all
487  * output values (mutually exclusive probability)
488  */
489  LayerData (size_t size,
490  const_iterator_type itWeightBegin,
491  iterator_type itGradientBegin,
492  std::shared_ptr<std::function<double(double)>> activationFunction,
493  std::shared_ptr<std::function<double(double)>> inverseActivationFunction,
495 
496  /*! \brief c'tor of LayerData
497  *
498  * C'tor of LayerData for all layers which are not the input layer; Used during the application of the DNN
499  *
500  * \param size size of the layer
501  * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
502  * \param itFunctionBegin indicates the start of the vector of activation functions for this layer on the
503  * activation function vector
504  * \param eModeOutput indicates a potential tranformation of the output values before further computation
505  * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
506  * output value (to create a probability); SOFTMAX applies a softmax transformation to all
507  * output values (mutually exclusive probability)
508  */
509  LayerData (size_t size, const_iterator_type itWeightBegin,
510  std::shared_ptr<std::function<double(double)>> activationFunction,
512 
513  /*! \brief copy c'tor of LayerData
514  *
515  *
516  */
517  LayerData (const LayerData& other)
518  : m_size (other.m_size)
520  , m_itInputEnd (other.m_itInputEnd)
521  , m_deltas (other.m_deltas)
523  , m_values (other.m_values)
524  , m_itDropOut (other.m_itDropOut)
525  , m_hasDropOut (other.m_hasDropOut)
531  , m_hasWeights (other.m_hasWeights)
533  , m_eModeOutput (other.m_eModeOutput)
534  {}
535 
536  /*! \brief move c'tor of LayerData
537  *
538  *
539  */
541  : m_size (other.m_size)
543  , m_itInputEnd (other.m_itInputEnd)
544  , m_deltas (std::move(other.m_deltas))
545  , m_valueGradients (std::move(other.m_valueGradients))
546  , m_values (std::move(other.m_values))
547  , m_itDropOut (other.m_itDropOut)
548  , m_hasDropOut (other.m_hasDropOut)
554  , m_hasWeights (other.m_hasWeights)
556  , m_eModeOutput (other.m_eModeOutput)
557  {}
558 
559 
560  /*! \brief change the input iterators
561  *
562  *
563  * \param itInputBegin indicates the start of the input node vector
564  * \param itInputEnd indicates the end of the input node vector
565  *
566  */
567  void setInput (const_iterator_type itInputBegin, const_iterator_type itInputEnd)
568  {
569  m_isInputLayer = true;
570  m_itInputBegin = itInputBegin;
571  m_itInputEnd = itInputEnd;
572  }
573 
574  /*! \brief clear the values and the deltas
575  *
576  *
577  */
578  void clear ()
579  {
580  m_values.assign (m_values.size (), 0.0);
581  m_deltas.assign (m_deltas.size (), 0.0);
582  }
583 
584  const_iterator_type valuesBegin () const { return m_isInputLayer ? m_itInputBegin : begin (m_values); } ///< returns const iterator to the begin of the (node) values
585  const_iterator_type valuesEnd () const { return m_isInputLayer ? m_itInputEnd : end (m_values); } ///< returns iterator to the end of the (node) values
586 
587  iterator_type valuesBegin () { assert (!m_isInputLayer); return begin (m_values); } ///< returns iterator to the begin of the (node) values
588  iterator_type valuesEnd () { assert (!m_isInputLayer); return end (m_values); } ///< returns iterator to the end of the (node) values
589 
590  ModeOutputValues outputMode () const { return m_eModeOutput; } ///< returns the output mode
591  container_type probabilities () const { return computeProbabilities (); } ///< computes the probabilities from the current node values and returns them
592 
593  iterator_type deltasBegin () { return begin (m_deltas); } ///< returns iterator to the begin of the deltas (back-propagation)
594  iterator_type deltasEnd () { return end (m_deltas); } ///< returns iterator to the end of the deltas (back-propagation)
595 
596  const_iterator_type deltasBegin () const { return begin (m_deltas); } ///< returns const iterator to the begin of the deltas (back-propagation)
597  const_iterator_type deltasEnd () const { return end (m_deltas); } ///< returns const iterator to the end of the deltas (back-propagation)
598 
599  iterator_type valueGradientsBegin () { return begin (m_valueGradients); } ///< returns iterator to the begin of the gradients of the node values
600  iterator_type valueGradientsEnd () { return end (m_valueGradients); } ///< returns iterator to the end of the gradients of the node values
601 
602  const_iterator_type valueGradientsBegin () const { return begin (m_valueGradients); } ///< returns const iterator to the begin of the gradients
603  const_iterator_type valueGradientsEnd () const { return end (m_valueGradients); } ///< returns const iterator to the end of the gradients
604 
605  iterator_type gradientsBegin () { assert (m_hasGradients); return m_itGradientBegin; } ///< returns iterator to the begin of the gradients
606  const_iterator_type gradientsBegin () const { assert (m_hasGradients); return m_itGradientBegin; } ///< returns const iterator to the begin of the gradients
607  const_iterator_type weightsBegin () const { assert (m_hasWeights); return m_itConstWeightBegin; } ///< returns const iterator to the begin of the weights for this layer
608 
609  std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; }
610  std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; }
611 
612  /*! \brief set the drop-out info for this layer
613  *
614  */
615  template <typename Iterator>
616  void setDropOut (Iterator itDrop) { m_itDropOut = itDrop; m_hasDropOut = true; }
617 
618  /*! \brief clear the drop-out-data for this layer
619  *
620  *
621  */
622  void clearDropOut () { m_hasDropOut = false; }
623 
624  bool hasDropOut () const { return m_hasDropOut; } ///< has this layer drop-out turned on?
625  const_dropout_iterator dropOut () const { assert (m_hasDropOut); return m_itDropOut; } ///< return the begin of the drop-out information
626 
627  size_t size () const { return m_size; } ///< return the size of the layer
628 
629  private:
630 
631  /*! \brief compute the probabilities from the node values
632  *
633  *
634  */
636 
637  private:
638 
639  size_t m_size; ////< layer size
640 
641  const_iterator_type m_itInputBegin; ///< iterator to the first of the nodes in the input node vector
642  const_iterator_type m_itInputEnd; ///< iterator to the end of the nodes in the input node vector
643 
644  std::vector<double> m_deltas; ///< stores the deltas for the DNN training
645  std::vector<double> m_valueGradients; ///< stores the gradients of the values (nodes)
646  std::vector<double> m_values; ///< stores the values of the nodes in this layer
647  const_dropout_iterator m_itDropOut; ///< iterator to a container indicating if the corresponding node is to be dropped
648  bool m_hasDropOut; ///< dropOut is turned on?
649 
650  const_iterator_type m_itConstWeightBegin; ///< const iterator to the first weight of this layer in the weight vector
651  iterator_type m_itGradientBegin; ///< iterator to the first gradient of this layer in the gradient vector
652 
653  std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< activation function for this layer
654  std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< inverse activation function for this layer
655 
656  bool m_isInputLayer; ///< is this layer an input layer
657  bool m_hasWeights; ///< does this layer have weights (it does not if it is the input layer)
658  bool m_hasGradients; ///< does this layer have gradients (only if in training mode)
659 
660  ModeOutputValues m_eModeOutput; ///< stores the output mode (DIRECT, SIGMOID, SOFTMAX)
661 
662  };
663 
664 
665 
666 
667 
668  /*! \brief Layer defines the layout of a layer
669  *
670  * Layer defines the layout of a specific layer in the DNN
671  * Objects of this class don't hold the layer data itself (see class "LayerData")
672  *
673  */
674  class Layer
675  {
676  public:
677 
678  /*! \brief c'tor for defining a Layer
679  *
680  *
681  * \param itInputBegin indicates the start of the input node vector
682  * \param itInputEnd indicates the end of the input node vector
683  *
684  */
686 
687  ModeOutputValues modeOutputValues () const { return m_eModeOutputValues; } ///< get the mode-output-value (direct, probabilities)
688  void modeOutputValues (ModeOutputValues eModeOutputValues) { m_eModeOutputValues = eModeOutputValues; } ///< set the mode-output-value
689 
690  size_t numNodes () const { return m_numNodes; } ///< return the number of nodes of this layer
691  size_t numWeights (size_t numInputNodes) const { return numInputNodes * numNodes (); } ///< return the number of weights for this layer (fully connected)
692 
693  std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; } ///< fetch the activation function for this layer
694  std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; } ///< fetch the inverse activation function for this layer
695 
696  EnumFunction activationFunctionType () const { return m_activationFunctionType; } ///< get the activation function type for this layer
697 
698  private:
699 
700 
701  std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< stores the activation function
702  std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< stores the inverse activation function
703 
704 
705  size_t m_numNodes;
706 
707  ModeOutputValues m_eModeOutputValues; ///< do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned as such
709 
710  friend class Net;
711  };
712 
713 
714 
715 
716 
717  template <typename LAYERDATA>
718  void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
719 
720 
721  template <typename LAYERDATA>
722  void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
723 
724 
725  template <typename LAYERDATA>
726  void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double weightDecay, EnumRegularization regularization);
727 
728 
729 
730  /*! \brief Settings for the training of the neural net
731  *
732  *
733  */
734  class Settings
735  {
736  public:
737 
738  /*! \brief c'tor
739  *
740  *
741  */
743  size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
744  double _factorWeightDecay = 1e-5, TMVA::DNN::EnumRegularization _regularization = TMVA::DNN::EnumRegularization::NONE,
745  MinimizerType _eMinimizerType = MinimizerType::fSteepest,
746  double _learningRate = 1e-5, double _momentum = 0.3,
747  int _repetitions = 3,
748  bool _multithreading = true);
749 
750  /*! \brief d'tor
751  *
752  *
753  */
754  virtual ~Settings ();
755 
756 
757  /*! \brief set the drop-out configuration (layer-wise)
758  *
759  * \param begin begin of an array or vector denoting the drop-out probabilities for each layer
760  * \param end end of an array or vector denoting the drop-out probabilities for each layer
761  * \param _dropRepetitions denotes after how many repetitions the drop-out setting (which nodes are dropped out exactly) is changed
762  */
763  template <typename Iterator>
764  void setDropOut (Iterator begin, Iterator end, size_t _dropRepetitions) { m_dropOut.assign (begin, end); m_dropRepetitions = _dropRepetitions; }
765 
766  size_t dropRepetitions () const { return m_dropRepetitions; }
767  const std::vector<double>& dropFractions () const { return m_dropOut; }
768 
769  void setMonitoring (std::shared_ptr<Monitoring> ptrMonitoring) { fMonitoring = ptrMonitoring; } ///< prepared for monitoring
770 
771  size_t convergenceSteps () const { return m_convergenceSteps; } ///< how many steps until training is deemed to have converged
772  size_t batchSize () const { return m_batchSize; } ///< mini-batch size
773  size_t testRepetitions () const { return m_testRepetitions; } ///< how often is the test data tested
774  double factorWeightDecay () const { return m_factorWeightDecay; } ///< get the weight-decay factor
775 
776  double learningRate () const { return fLearningRate; } ///< get the learning rate
777  double momentum () const { return fMomentum; } ///< get the momentum (e.g. for SGD)
778  int repetitions () const { return fRepetitions; } ///< how many steps have to be gone until the batch is changed
779  MinimizerType minimizerType () const { return fMinimizerType; } ///< which minimizer shall be used (e.g. SGD)
780 
781 
782 
783 
784 
785 
786  virtual void testSample (double /*error*/, double /*output*/, double /*target*/, double /*weight*/) {} ///< virtual function to be used for monitoring (callback)
787  virtual void startTrainCycle () ///< callback for monitoring and logging
788  {
789  m_convergenceCount = 0;
791  m_minError = 1e10;
792  }
793  virtual void endTrainCycle (double /*error*/) {} ///< callback for monitoring and logging
794 
795  virtual void setProgressLimits (double minProgress = 0, double maxProgress = 100) ///< for monitoring and logging (set the current "progress" limits for the display of the progress)
796  {
797  m_minProgress = minProgress;
798  m_maxProgress = maxProgress;
799  }
800  virtual void startTraining () ///< start drawing the progress bar
801  {
803  }
804  virtual void cycle (double progress, TString text) ///< advance on the progress bar
805  {
807  }
808 
809  virtual void startTestCycle () {} ///< callback for monitoring and loggging
810  virtual void endTestCycle () {} ///< callback for monitoring and loggging
811  virtual void testIteration () {} ///< callback for monitoring and loggging
812  virtual void drawSample (const std::vector<double>& /*input*/, const std::vector<double>& /* output */, const std::vector<double>& /* target */, double /* patternWeight */) {} ///< callback for monitoring and loggging
813 
814  virtual void computeResult (const Net& /* net */, std::vector<double>& /* weights */) {} ///< callback for monitoring and loggging
815 
816  virtual bool hasConverged (double testError); ///< has this training converged already?
817 
818  EnumRegularization regularization () const { return m_regularization; } ///< some regularization of the DNN is turned on?
819 
820  bool useMultithreading () const { return m_useMultithreading; } ///< is multithreading turned on?
821 
822 
823  void pads (int numPads) { if (fMonitoring) fMonitoring->pads (numPads); } ///< preparation for monitoring
824  void create (std::string histoName, int bins, double min, double max) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max); } ///< for monitoring
825  void create (std::string histoName, int bins, double min, double max, int bins2, double min2, double max2) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max, bins2, min2, max2); } ///< for monitoring
826  void addPoint (std::string histoName, double x) { if (fMonitoring) fMonitoring->addPoint (histoName, x); } ///< for monitoring
827  void addPoint (std::string histoName, double x, double y) {if (fMonitoring) fMonitoring->addPoint (histoName, x, y); } ///< for monitoring
828  void plot (std::string histoName, std::string options, int pad, EColor color) { if (fMonitoring) fMonitoring->plot (histoName, options, pad, color); } ///< for monitoring
829  void clear (std::string histoName) { if (fMonitoring) fMonitoring->clear (histoName); } ///< for monitoring
830  bool exists (std::string histoName) { if (fMonitoring) return fMonitoring->exists (histoName); return false; } ///< for monitoring
831 
832  size_t convergenceCount () const { return m_convergenceCount; } ///< returns the current convergence count
833  size_t maxConvergenceCount () const { return m_maxConvergenceCount; } ///< returns the max convergence count so far
834  size_t minError () const { return m_minError; } ///< returns the smallest error so far
835 
836  public:
837  Timer m_timer; ///< timer for monitoring
838  double m_minProgress; ///< current limits for the progress bar
839  double m_maxProgress; ///< current limits for the progress bar
840 
841 
842  size_t m_convergenceSteps; ///< number of steps without improvement to consider the DNN to have converged
843  size_t m_batchSize; ///< mini-batch size
846 
847  size_t count_E;
848  size_t count_dE;
849  size_t count_mb_E;
850  size_t count_mb_dE;
851 
853 
855  std::vector<double> m_dropOut;
856 
858  double fMomentum;
861 
864  double m_minError;
865 
866 
867  protected:
869 
870  std::shared_ptr<Monitoring> fMonitoring;
871  };
872 
873 
874 
875 
876 
877 
878 
879 
880 
881 
882 
883 
884 
885 
886 
887 
888 
889 
890 
891 
892 
893 
894 
895  /*! \brief Settings for classification
896  *
897  * contains additional settings if the DNN problem is classification
898  */
900  {
901  public:
902  /*! \brief c'tor
903  *
904  *
905  */
907  size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
908  double _factorWeightDecay = 1e-5, EnumRegularization _regularization = EnumRegularization::NONE,
909  size_t _scaleToNumEvents = 0, MinimizerType _eMinimizerType = MinimizerType::fSteepest,
910  double _learningRate = 1e-5, double _momentum = 0.3, int _repetitions = 3,
911  bool _useMultithreading = true)
912  : Settings (name, _convergenceSteps, _batchSize, _testRepetitions, _factorWeightDecay,
913  _regularization, _eMinimizerType, _learningRate, _momentum, _repetitions, _useMultithreading)
914  , m_ams ()
915  , m_sumOfSigWeights (0)
916  , m_sumOfBkgWeights (0)
917  , m_scaleToNumEvents (_scaleToNumEvents)
918  , m_cutValue (10.0)
920  , m_fileNameResult ()
922  {
923  }
924 
925  /*! \brief d'tor
926  *
927  *
928  */
930  {
931  }
932 
933  void startTrainCycle ();
934  void endTrainCycle (double /*error*/);
935  void testIteration () { if (fMonitoring) fMonitoring->ProcessEvents (); }
936 
937 
938  /* void createHistograms () */
939  /* { */
940  /* std::cout << "is hist ROC existing?" << std::endl; */
941  /* if (m_histROC) */
942  /* { */
943  /* std::cout << "--> yes" << std::endl; */
944  /* fMonitoring->ProcessEvents (); */
945  /* return; */
946  /* } */
947 
948  /* std::cout << "create histograms" << std::endl; */
949  /* TCanvas* canvas = fMonitoring->GetCanvas (); */
950  /* if (canvas) */
951  /* { */
952  /* std::cout << "canvas divide" << std::endl; */
953  /* canvas->cd (); */
954  /* canvas->Divide (2,2); */
955  /* } */
956  /* if (!m_histROC) */
957  /* { */
958  /* m_histROC = new TH2F ("ROC","ROC", 1000, 0, 1.0, 1000, 0, 1.0); m_histROC->SetDirectory (0); */
959  /* m_histROC->SetLineColor (kBlue); */
960  /* } */
961  /* if (!m_histSignificance) */
962  /* { */
963  /* m_histSignificance = new TH2F ("Significance", "Significance", 1000, 0,1.0, 5, 0.0, 2.0); */
964  /* m_histSignificance->SetDirectory (0); */
965  /* m_histSignificance->SetBit (TH1::kCanRebin); */
966  /* m_histROC->SetLineColor (kRed); */
967  /* } */
968  /* if (!m_histError) */
969  /* { */
970  /* m_histError = new TH1F ("Error", "Error", 100, 0, 100); */
971  /* m_histError->SetDirectory (0); */
972  /* m_histError->SetBit (TH1::kCanRebin); */
973  /* m_histROC->SetLineColor (kGreen); */
974  /* } */
975  /* if (!m_histOutputSignal) */
976  /* { */
977  /* m_histOutputSignal = new TH1F ("Signal", "Signal", 100, 0, 1.0); */
978  /* m_histOutputSignal->SetDirectory (0); */
979  /* m_histOutputSignal->SetBit (TH1::kCanRebin); */
980  /* } */
981  /* if (!m_histOutputBackground) */
982  /* { */
983  /* m_histOutputBackground = new TH1F ("Background", "Background", 100, 0, 1.0); */
984  /* m_histOutputBackground->SetDirectory (0); */
985  /* m_histOutputBackground->SetBit (TH1::kCanRebin); */
986  /* } */
987 
988  /* fMonitoring->ProcessEvents (); */
989  /* } */
990 
991  void testSample (double error, double output, double target, double weight);
992 
993  virtual void startTestCycle ();
994  virtual void endTestCycle ();
995 
996 
997  void setWeightSums (double sumOfSigWeights, double sumOfBkgWeights);
998  void setResultComputation (std::string _fileNameNetConfig, std::string _fileNameResult, std::vector<Pattern>* _resultPatternContainer);
999 
1000  std::vector<double> m_input;
1001  std::vector<double> m_output;
1002  std::vector<double> m_targets;
1003  std::vector<double> m_weights;
1004 
1005  std::vector<double> m_ams;
1006  std::vector<double> m_significances;
1007 
1008 
1012 
1013  double m_cutValue;
1014  std::vector<Pattern>* m_pResultPatternContainer;
1015  std::string m_fileNameResult;
1016  std::string m_fileNameNetConfig;
1017 
1018 
1019  /* TH2F* m_histROC; */
1020  /* TH2F* m_histSignificance; */
1021 
1022  /* TH1F* m_histError; */
1023  /* TH1F* m_histOutputSignal; */
1024  /* TH1F* m_histOutputBackground; */
1025  };
1026 
1027 
1028 
1029 
1030 
1031 
1032 
1033  ///< used to distinguish between different function signatures
1034  enum class ModeOutput
1035  {
1036  FETCH
1037  };
1038 
1039  /*! \brief error functions to be chosen from
1040  *
1041  *
1042  */
1044  {
1045  SUMOFSQUARES = 'S',
1046  CROSSENTROPY = 'C',
1048  };
1049 
1050  /*! \brief weight initialization strategies to be chosen from
1051  *
1052  *
1053  */
1055  {
1057  };
1058 
1059 
1060 
1061  /*! \brief neural net
1062  *
1063  * holds the structure of all layers and some data for the whole net
1064  * does not know the layer data though (i.e. values of the nodes and weights)
1065  */
1066  class Net
1067  {
1068  public:
1069 
1070  typedef std::vector<double> container_type;
1071  typedef container_type::iterator iterator_type;
1072  typedef std::pair<iterator_type,iterator_type> begin_end_type;
1073 
1074 
1075  /*! \brief c'tor
1076  *
1077  *
1078  */
1079  Net ()
1081  , m_sizeInput (0)
1082  , m_layers ()
1083  {
1084  }
1085 
1086  /*! \brief d'tor
1087  *
1088  *
1089  */
1090  Net (const Net& other)
1092  , m_sizeInput (other.m_sizeInput)
1093  , m_layers (other.m_layers)
1094  {
1095  }
1096 
1097  void setInputSize (size_t sizeInput) { m_sizeInput = sizeInput; } ///< set the input size of the DNN
1098  void setOutputSize (size_t sizeOutput) { m_sizeOutput = sizeOutput; } ///< set the output size of the DNN
1099  void addLayer (Layer& layer) { m_layers.push_back (layer); } ///< add a layer (layout)
1100  void addLayer (Layer&& layer) { m_layers.push_back (layer); }
1101  void setErrorFunction (ModeErrorFunction eErrorFunction) { m_eErrorFunction = eErrorFunction; } ///< which error function is to be used
1102 
1103  size_t inputSize () const { return m_sizeInput; } ///< input size of the DNN
1104  size_t outputSize () const { return m_sizeOutput; } ///< output size of the DNN
1105 
1106  /*! \brief set the drop out configuration
1107  *
1108  *
1109  */
1110  template <typename WeightsType, typename DropProbabilities>
1111  void dropOutWeightFactor (WeightsType& weights,
1112  const DropProbabilities& drops,
1113  bool inverse = false);
1114 
1115  /*! \brief start the training
1116  *
1117  * \param weights weight vector
1118  * \param trainPattern training pattern
1119  * \param testPattern test pattern
1120  * \param minimizer use this minimizer for training (e.g. SGD)
1121  * \param settings settings used for this training run
1122  */
1123  template <typename Minimizer>
1124  double train (std::vector<double>& weights,
1125  std::vector<Pattern>& trainPattern,
1126  const std::vector<Pattern>& testPattern,
1127  Minimizer& minimizer,
1128  Settings& settings);
1129 
1130  /*! \brief pre-training for future use
1131  *
1132  *
1133  */
1134  template <typename Minimizer>
1135  void preTrain (std::vector<double>& weights,
1136  std::vector<Pattern>& trainPattern,
1137  const std::vector<Pattern>& testPattern,
1138  Minimizer& minimizer, Settings& settings);
1139 
1140 
1141  /*! \brief executes one training cycle
1142  *
1143  * \param minimizier the minimizer to be used
1144  * \param weights the weight vector to be used
1145  * \param itPatternBegin the pattern to be trained with
1146  * \param itPatternEnd the pattern to be trainied with
1147  * \param settings the settings for the training
1148  * \param dropContainer the configuration for DNN drop-out
1149  */
1150  template <typename Iterator, typename Minimizer>
1151  inline double trainCycle (Minimizer& minimizer, std::vector<double>& weights,
1152  Iterator itPatternBegin, Iterator itPatternEnd,
1153  Settings& settings,
1154  DropContainer& dropContainer);
1155 
1156  size_t numWeights (size_t trainingStartLayer = 0) const; ///< returns the number of weights in this net
1157  size_t numNodes (size_t trainingStartLayer = 0) const; ///< returns the number of nodes in this net
1158 
1159  template <typename Weights>
1160  std::vector<double> compute (const std::vector<double>& input, const Weights& weights) const; ///< compute the net with the given input and the given weights
1161 
1162  template <typename Weights, typename PassThrough>
1163  double operator() (PassThrough& settingsAndBatch, const Weights& weights) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients
1164 
1165  template <typename Weights, typename PassThrough, typename OutContainer>
1166  double operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput eFetch, OutContainer& outputContainer) const; ///< execute computation of the DNN for one mini-batch; helper function
1167 
1168  template <typename Weights, typename Gradients, typename PassThrough>
1169  double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well
1170 
1171  template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1172  double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const;
1173 
1174 
1175  template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1176  std::vector<std::vector<LayerData>> prepareLayerData (LayerContainer& layers,
1177  Batch& batch,
1178  const DropContainer& dropContainer,
1179  ItWeight itWeightBegin,
1180  ItWeight itWeightEnd,
1181  ItGradient itGradientBegin,
1182  ItGradient itGradientEnd,
1183  size_t& totalNumWeights) const;
1184 
1185  template <typename LayerContainer>
1186  void forwardPattern (const LayerContainer& _layers,
1187  std::vector<LayerData>& layerData) const;
1188 
1189 
1190  template <typename LayerContainer, typename LayerPatternContainer>
1191  void forwardBatch (const LayerContainer& _layers,
1192  LayerPatternContainer& layerPatternData,
1193  std::vector<double>& valuesMean,
1194  std::vector<double>& valuesStdDev,
1195  size_t trainFromLayer) const;
1196 
1197  template <typename OutputContainer>
1198  void fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const;
1199 
1200  template <typename OutputContainer>
1201  void fetchOutput (const std::vector<LayerData>& layerPatternData, OutputContainer& outputContainer) const;
1202 
1203 
1204  template <typename ItWeight>
1205  std::tuple</*sumError*/double,/*sumWeights*/double> computeError (const Settings& settings,
1206  std::vector<LayerData>& lastLayerData,
1207  Batch& batch,
1208  ItWeight itWeightBegin,
1209  ItWeight itWeightEnd) const;
1210 
1211  template <typename Settings>
1212  void backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1213  const Settings& settings,
1214  size_t trainFromLayer,
1215  size_t totalNumWeights) const;
1216 
1217 
1218 
1219  /*! \brief main NN computation function
1220  *
1221  *
1222  */
1223  template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1224  double forward_backward (LayerContainer& layers, PassThrough& settingsAndBatch,
1225  ItWeight itWeightBegin, ItWeight itWeightEnd,
1226  ItGradient itGradientBegin, ItGradient itGradientEnd,
1227  size_t trainFromLayer,
1228  OutContainer& outputContainer, bool fetchOutput) const;
1229 
1230 
1231 
1232  double E ();
1233  void dE ();
1234 
1235 
1236  /*! \brief computes the error of the DNN
1237  *
1238  *
1239  */
1240  template <typename Container, typename ItWeight>
1241  double errorFunction (LayerData& layerData,
1242  Container truth,
1243  ItWeight itWeight,
1244  ItWeight itWeightEnd,
1245  double patternWeight,
1246  double factorWeightDecay,
1247  EnumRegularization eRegularization) const;
1248 
1249 
1250  const std::vector<Layer>& layers () const { return m_layers; } ///< returns the layers (structure)
1251  std::vector<Layer>& layers () { return m_layers; } ///< returns the layers (structure)
1252 
1253  void removeLayer () { m_layers.pop_back (); } ///< remove one layer
1254 
1255 
1256  void clear () ///< clear one layer
1257  {
1258  m_layers.clear ();
1260  }
1261 
1262 
1263  template <typename OutIterator>
1264  void initializeWeights (WeightInitializationStrategy eInitStrategy,
1265  OutIterator itWeight); ///< initialize the weights with the given strategy
1266 
1267  protected:
1268 
1269  void fillDropContainer (DropContainer& dropContainer, double dropFraction, size_t numNodes) const; ///< prepare the drop-out-container (select the nodes which are to be dropped out)
1270 
1271 
1272  private:
1273 
1274  ModeErrorFunction m_eErrorFunction; ///< denotes the error function
1275  size_t m_sizeInput; ///< input size of this DNN
1276  size_t m_sizeOutput; ///< outut size of this DNN
1277  std::vector<Layer> m_layers; ///< layer-structure-data
1278 
1279  protected:
1280  // variables for JsMVA (interactive training in jupyter notebook)
1282  bool * fExitFromTraining = nullptr;
1283  UInt_t *fIPyMaxIter = nullptr, *fIPyCurrentIter = nullptr;
1284 
1285  public:
1286 
1287  // setup ipython interactive variables
1289  fInteractive = fI;
1290  fExitFromTraining = fE;
1291  fIPyMaxIter = M;
1292  fIPyCurrentIter = C;
1293  }
1294  };
1295 
1296 
1297 
1298 
1299 typedef std::tuple<Settings&, Batch&, DropContainer&> pass_through_type;
1300 
1301 
1302 
1303 
1304 
1305 
1306 
1307  } // namespace DNN
1308 } // namespace TMVA
1309 
1310 
1311 // include the implementations (in header file, because they are templated)
1312 #include "TMVA/NeuralNet.icc"
1313 
1314 #endif
1315 
void addPoint(std::string histoName, double x)
for monitoring
Definition: NeuralNet.h:826
void setWeightSums(double sumOfSigWeights, double sumOfBkgWeights)
set the weight sums to be scaled to (preparations for monitoring output)
Definition: NeuralNet.cxx:515
void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:935
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition: NeuralNet.h:610
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
Definition: NeuralNet.icc:1330
virtual void startTraining()
Definition: NeuralNet.h:800
ModeErrorFunction m_eErrorFunction
denotes the error function
Definition: NeuralNet.h:1274
const_iterator_type weightsBegin() const
returns const iterator to the begin of the weights for this layer
Definition: NeuralNet.h:607
void setInput(const_iterator_type itInputBegin, const_iterator_type itInputEnd)
change the input iterators
Definition: NeuralNet.h:567
void addLayer(Layer &&layer)
Definition: NeuralNet.h:1100
size_t convergenceCount() const
returns the current convergence count
Definition: NeuralNet.h:832
virtual ~ClassificationSettings()
d&#39;tor
Definition: NeuralNet.h:929
virtual void cycle(double progress, TString text)
Definition: NeuralNet.h:804
MinimizerType
< list all the minimizer types
Definition: NeuralNet.h:319
void add(ITERATOR itBegin, ITERATOR itEnd)
Definition: NeuralNet.h:115
double var_corr() const
Definition: NeuralNet.h:135
size_t m_sizeInput
input size of this DNN
Definition: NeuralNet.h:1275
std::vector< Layer > & layers()
returns the layers (structure)
Definition: NeuralNet.h:1251
UInt_t * fIPyMaxIter
Definition: NeuralNet.h:1283
void create(std::string histoName, int bins, double min, double max, int bins2, double min2, double max2)
for monitoring
Definition: NeuralNet.h:825
std::vector< char > DropContainer
Definition: NeuralNet.h:218
void setDropOut(Iterator begin, Iterator end, size_t _dropRepetitions)
set the drop-out configuration (layer-wise)
Definition: NeuralNet.h:764
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
stores the inverse activation function
Definition: NeuralNet.h:702
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:593
bool isFlagSet(T flag, T value)
Definition: NeuralNet.h:211
double m_maxProgress
current limits for the progress bar
Definition: NeuralNet.h:839
Steepest Gradient Descent algorithm (SGD)
Definition: NeuralNet.h:332
ModeOutputValues modeOutputValues() const
get the mode-output-value (direct, probabilities)
Definition: NeuralNet.h:687
double T(double x)
Definition: ChebyshevPol.h:34
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
Definition: NeuralNet.icc:1245
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition: NeuralNet.h:771
std::pair< iterator_type, iterator_type > begin_end_type
Definition: NeuralNet.h:1072
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
Definition: NeuralNet.icc:1225
double m_dropRepetitions
Definition: NeuralNet.h:854
std::vector< std::function< double(double)> > function_container_type
Definition: NeuralNet.h:443
void add(T value, double weight=1.0)
Definition: NeuralNet.h:92
const std::vector< double > & dropFractions() const
Definition: NeuralNet.h:767
Net()
c&#39;tor
Definition: NeuralNet.h:1079
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
Definition: NeuralNet.icc:572
size_t inputSize() const
input size of the DNN
Definition: NeuralNet.h:1103
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
Definition: NeuralNet.h:1101
iterator_type valueGradientsEnd()
returns iterator to the end of the gradients of the node values
Definition: NeuralNet.h:600
std::shared_ptr< Monitoring > fMonitoring
Definition: NeuralNet.h:870
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition: NeuralNet.h:818
Basic string class.
Definition: TString.h:131
bool useMultithreading() const
is multithreading turned on?
Definition: NeuralNet.h:820
int Int_t
Definition: RtypesCore.h:41
function_container_type::iterator function_iterator_type
Definition: NeuralNet.h:444
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
Definition: NeuralNet.icc:941
ModeOutputValues operator|(ModeOutputValues lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:187
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition: NeuralNet.h:828
const_iterator_type gradientsBegin() const
returns const iterator to the begin of the gradients
Definition: NeuralNet.h:606
STL namespace.
iterator_type m_itGradientBegin
iterator to the first gradient of this layer in the gradient vector
Definition: NeuralNet.h:651
bool m_hasGradients
does this layer have gradients (only if in training mode)
Definition: NeuralNet.h:658
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
Definition: NeuralNet.icc:183
ModeOutputValues operator &(ModeOutputValues lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:198
ModeOutputValues m_eModeOutput
stores the output mode (DIRECT, SIGMOID, SOFTMAX)
Definition: NeuralNet.h:660
size_t numWeights(size_t numInputNodes) const
return the number of weights for this layer (fully connected)
Definition: NeuralNet.h:691
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Definition: NeuralNet.h:1098
virtual void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:811
bool m_isInputLayer
is this layer an input layer
Definition: NeuralNet.h:656
double momentum() const
get the momentum (e.g. for SGD)
Definition: NeuralNet.h:777
EnumFunction m_activationFunctionType
Definition: NeuralNet.h:708
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
iterator_type valuesEnd()
returns iterator to the end of the (node) values
Definition: NeuralNet.h:588
neural net
Definition: NeuralNet.h:1066
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
std::shared_ptr< std::function< double(double)> > activationFunction() const
fetch the activation function for this layer
Definition: NeuralNet.h:693
double learningRate() const
get the learning rate
Definition: NeuralNet.h:776
size_t m_sizeOutput
outut size of this DNN
Definition: NeuralNet.h:1276
size_t m_numNodes
Definition: NeuralNet.h:705
Settings(TString name, size_t _convergenceSteps=15, size_t _batchSize=10, size_t _testRepetitions=7, double _factorWeightDecay=1e-5, TMVA::DNN::EnumRegularization _regularization=TMVA::DNN::EnumRegularization::NONE, MinimizerType _eMinimizerType=MinimizerType::fSteepest, double _learningRate=1e-5, double _momentum=0.3, int _repetitions=3, bool _multithreading=true)
c&#39;tor
Definition: NeuralNet.cxx:232
std::vector< double > m_valueGradients
stores the gradients of the values (nodes)
Definition: NeuralNet.h:645
std::vector< double > m_significances
Definition: NeuralNet.h:1006
virtual void startTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:809
size_t m_convergenceSteps
number of steps without improvement to consider the DNN to have converged
Definition: NeuralNet.h:842
double sqrt(double)
container_type::const_iterator const_iterator_type
Definition: NeuralNet.h:441
MinimizerType fMinimizerType
Definition: NeuralNet.h:860
Double_t x[n]
Definition: legend1.C:17
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
std::vector< double > m_dropOut
Definition: NeuralNet.h:855
const_iterator_type valueGradientsBegin() const
returns const iterator to the begin of the gradients
Definition: NeuralNet.h:602
std::vector< Layer > m_layers
layer-structure-data
Definition: NeuralNet.h:1277
ModeOutputValues outputMode() const
returns the output mode
Definition: NeuralNet.h:590
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
Definition: NeuralNet.h:1288
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition: NeuralNet.h:609
size_t m_batchSize
mini-batch size
Definition: NeuralNet.h:843
const_iterator_type m_itConstWeightBegin
const iterator to the first weight of this layer in the weight vector
Definition: NeuralNet.h:650
function_container_type::const_iterator const_function_iterator_type
Definition: NeuralNet.h:445
ModeOutputValues m_eModeOutputValues
do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned...
Definition: NeuralNet.h:707
void removeLayer()
remove one layer
Definition: NeuralNet.h:1253
bool hasDropOut() const
has this layer drop-out turned on?
Definition: NeuralNet.h:624
void setInputSize(size_t sizeInput)
set the input size of the DNN
Definition: NeuralNet.h:1097
const_iterator_type m_itInputBegin
iterator to the first of the nodes in the input node vector
Definition: NeuralNet.h:641
bool m_hasDropOut
dropOut is turned on?
Definition: NeuralNet.h:648
ClassificationSettings(TString name, size_t _convergenceSteps=15, size_t _batchSize=10, size_t _testRepetitions=7, double _factorWeightDecay=1e-5, EnumRegularization _regularization=EnumRegularization::NONE, size_t _scaleToNumEvents=0, MinimizerType _eMinimizerType=MinimizerType::fSteepest, double _learningRate=1e-5, double _momentum=0.3, int _repetitions=3, bool _useMultithreading=true)
c&#39;tor
Definition: NeuralNet.h:906
const_iterator_type valueGradientsEnd() const
returns const iterator to the end of the gradients
Definition: NeuralNet.h:603
const_iterator end() const
Definition: NeuralNet.h:244
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition: NeuralNet.h:372
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:151
void startTrainCycle()
action to be done when the training cycle is started (e.g.
Definition: NeuralNet.cxx:281
const Double_t sigma
double stdDev_corr() const
Definition: NeuralNet.h:143
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition: NeuralNet.h:824
double var() const
Definition: NeuralNet.h:126
EnumFunction activationFunctionType() const
get the activation function type for this layer
Definition: NeuralNet.h:696
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
Definition: NeuralNet.icc:652
std::vector< double > m_output
Definition: NeuralNet.h:1001
EColor
Definition: Rtypes.h:63
size_t testRepetitions() const
how often is the test data tested
Definition: NeuralNet.h:773
LayerData(const_iterator_type itInputBegin, const_iterator_type itInputEnd, ModeOutputValues eModeOutput=ModeOutputValues::DIRECT)
c&#39;tor of LayerData
Definition: NeuralNet.cxx:81
virtual ~Settings()
d&#39;tor
Definition: NeuralNet.cxx:261
iterator_type valuesBegin()
returns iterator to the begin of the (node) values
Definition: NeuralNet.h:587
double stdDev() const
Definition: NeuralNet.h:144
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
Definition: NeuralNet.icc:1300
void setMonitoring(std::shared_ptr< Monitoring > ptrMonitoring)
prepared for monitoring
Definition: NeuralNet.h:769
std::vector< Pattern > * m_pResultPatternContainer
Definition: NeuralNet.h:1014
std::vector< double > m_deltas
stores the deltas for the DNN training
Definition: NeuralNet.h:644
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1483
UInt_t * fIPyCurrentIter
Definition: NeuralNet.h:1283
double m_minProgress
current limits for the progress bar
Definition: NeuralNet.h:838
iterator_type gradientsBegin()
returns iterator to the begin of the gradients
Definition: NeuralNet.h:605
double weights() const
Definition: NeuralNet.h:124
size_t dropRepetitions() const
Definition: NeuralNet.h:766
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition: NeuralNet.h:591
static double C[]
Double_t(* Function)(Double_t)
Definition: Functor.C:4
size_t m_testRepetitions
Definition: NeuralNet.h:844
Layer(size_t numNodes, EnumFunction activationFunction, ModeOutputValues eModeOutputValues=ModeOutputValues::DIRECT)
c&#39;tor for defining a Layer
Definition: NeuralNet.cxx:166
container_type::iterator iterator_type
Definition: NeuralNet.h:440
const std::vector< Layer > & layers() const
returns the layers (structure)
Definition: NeuralNet.h:1250
std::vector< Pattern >::const_iterator const_iterator
Definition: NeuralNet.h:236
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition: NeuralNet.h:793
double factorWeightDecay() const
get the weight-decay factor
Definition: NeuralNet.h:774
ModeOutputValues operator &=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:203
LayerData(LayerData &&other)
move c&#39;tor of LayerData
Definition: NeuralNet.h:540
container_type computeProbabilities() const
compute the probabilities from the node values
Definition: NeuralNet.cxx:140
size_t m_convergenceCount
Definition: NeuralNet.h:862
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
Definition: NeuralNet.icc:1072
std::vector< double > container_type
Definition: NeuralNet.h:438
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
Definition: NeuralNet.icc:458
Timer m_timer
timer for monitoring
Definition: NeuralNet.h:837
size_t minError() const
returns the smallest error so far
Definition: NeuralNet.h:834
void endTrainCycle(double)
action to be done when the training cycle is ended (e.g.
Definition: NeuralNet.cxx:296
size_t numNodes() const
return the number of nodes of this layer
Definition: NeuralNet.h:690
unsigned int UInt_t
Definition: RtypesCore.h:42
double m_factorWeightDecay
Definition: NeuralNet.h:845
void fillDropContainer(DropContainer &dropContainer, double dropFraction, size_t numNodes) const
prepare the drop-out-container (select the nodes which are to be dropped out)
Definition: NeuralNet.cxx:575
LayerData(const LayerData &other)
copy c&#39;tor of LayerData
Definition: NeuralNet.h:517
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:214
virtual void endTestCycle()
action to be done when the training cycle is ended (e.g.
Definition: NeuralNet.cxx:326
void modeOutputValues(ModeOutputValues eModeOutputValues)
set the mode-output-value
Definition: NeuralNet.h:688
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
Definition: NeuralNet.icc:1592
void pads(int numPads)
preparation for monitoring
Definition: NeuralNet.h:823
Settings for the training of the neural net.
Definition: NeuralNet.h:734
virtual void startTrainCycle()
Definition: NeuralNet.h:787
virtual void drawSample(const std::vector< double > &, const std::vector< double > &, const std::vector< double > &, double)
callback for monitoring and loggging
Definition: NeuralNet.h:812
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
fetch the inverse activation function for this layer
Definition: NeuralNet.h:694
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition: NeuralNet.h:1054
ModeErrorFunction
error functions to be chosen from
Definition: NeuralNet.h:1043
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition: NeuralNet.h:375
Layer defines the layout of a layer.
Definition: NeuralNet.h:674
void preTrain(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
pre-training for future use
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
const_dropout_iterator m_itDropOut
iterator to a container indicating if the corresponding node is to be dropped
Definition: NeuralNet.h:647
size_t numWeights(size_t trainingStartLayer=0) const
returns the number of weights in this net
Definition: NeuralNet.cxx:543
Steepest(double learningRate=1e-4, double momentum=0.5, size_t repetitions=10)
c&#39;tor
Definition: NeuralNet.h:347
bool m_hasWeights
does this layer have weights (it does not if it is the input layer)
Definition: NeuralNet.h:657
std::vector< double > m_ams
Definition: NeuralNet.h:1005
EnumRegularization m_regularization
Definition: NeuralNet.h:852
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition: NeuralNet.h:585
bool * fExitFromTraining
Definition: NeuralNet.h:1282
void clearDropOut()
clear the drop-out-data for this layer
Definition: NeuralNet.h:622
IPythonInteractive * fInteractive
Definition: NeuralNet.h:1281
ModeOutputValues
Definition: NeuralNet.h:177
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
Definition: NeuralNet.icc:1113
double gaussDouble(double mean, double sigma)
Definition: NeuralNet.cxx:35
MinimizerType minimizerType() const
which minimizer shall be used (e.g. SGD)
Definition: NeuralNet.h:779
The Batch class encapsulates one mini-batch.
Definition: NeuralNet.h:233
TText * text
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
Definition: NeuralNet.icc:1039
double m_beta
internal parameter (momentum)
Definition: NeuralNet.h:371
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:712
int type
Definition: TGX11.cxx:120
double mean() const
Definition: NeuralNet.h:125
container_type::iterator iterator_type
Definition: NeuralNet.h:1071
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition: NeuralNet.h:833
Double_t y[n]
Definition: legend1.C:17
std::vector< double > m_targets
Definition: NeuralNet.h:1002
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
Definition: NeuralNet.icc:546
size_t numNodes(size_t trainingStartLayer=0) const
returns the number of nodes in this net
Definition: NeuralNet.cxx:559
double uniformDouble(double minValue, double maxValue)
Definition: NeuralNet.cxx:43
std::vector< double > m_values
stores the values of the nodes in this layer
Definition: NeuralNet.h:646
std::vector< double > m_weights
Definition: NeuralNet.h:1003
void setResultComputation(std::string _fileNameNetConfig, std::string _fileNameResult, std::vector< Pattern > *_resultPatternContainer)
preparation for monitoring output
Definition: NeuralNet.cxx:523
const_iterator_type deltasEnd() const
returns const iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:597
size_t m_maxConvergenceCount
Definition: NeuralNet.h:863
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
Definition: NeuralNet.icc:271
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:594
void addLayer(Layer &layer)
add a layer (layout)
Definition: NeuralNet.h:1099
This class is needed by JsMVA, and it&#39;s a helper class for tracking errors during the training in Jup...
Definition: MethodBase.h:94
create variable transformations
size_t size() const
return the size of the layer
Definition: NeuralNet.h:627
const_iterator_type m_itInputEnd
iterator to the end of the nodes in the input node vector
Definition: NeuralNet.h:642
std::shared_ptr< std::function< double(double)> > m_activationFunction
activation function for this layer
Definition: NeuralNet.h:653
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
inverse activation function for this layer
Definition: NeuralNet.h:654
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
Definition: NeuralNet.icc:1418
virtual void setProgressLimits(double minProgress=0, double maxProgress=100)
Definition: NeuralNet.h:795
void testSample(double error, double output, double target, double weight)
action to be done after the computation of a test sample (e.g.
Definition: NeuralNet.cxx:304
virtual void startTestCycle()
action to be done when the test cycle is started (e.g.
Definition: NeuralNet.cxx:316
size_t size() const
Definition: NeuralNet.h:246
size_t batchSize() const
mini-batch size
Definition: NeuralNet.h:772
virtual void endTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:810
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition: NeuralNet.h:374
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:203
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition: NeuralNet.h:599
const_iterator m_itBegin
iterator denoting the beginning of the batch
Definition: NeuralNet.h:249
std::vector< double > container_type
Definition: NeuralNet.h:1070
void clear()
clear the values and the deltas
Definition: NeuralNet.h:578
Settings for classificationused to distinguish between different function signatures.
Definition: NeuralNet.h:899
void backPropagate(std::vector< std::vector< LayerData >> &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Definition: NeuralNet.icc:1367
const_dropout_iterator dropOut() const
return the begin of the drop-out information
Definition: NeuralNet.h:625
void clear(std::string histoName)
for monitoring
Definition: NeuralNet.h:829
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition: NeuralNet.h:814
const_iterator_type deltasBegin() const
returns const iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:596
const_iterator begin() const
Definition: NeuralNet.h:243
Batch(typename std::vector< Pattern >::const_iterator itBegin, typename std::vector< Pattern >::const_iterator itEnd)
Definition: NeuralNet.h:238
DropContainer::const_iterator const_dropout_iterator
Definition: NeuralNet.h:447
double m_alpha
internal parameter (learningRate)
Definition: NeuralNet.h:370
EnumRegularization
Definition: NeuralNet.h:171
static void output(int code)
Definition: gifencode.c:226
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition: NeuralNet.h:584
RooCmdArg Minimizer(const char *type, const char *alg=0)
void setDropOut(Iterator itDrop)
set the drop-out info for this layer
Definition: NeuralNet.h:616
void addPoint(std::string histoName, double x, double y)
for monitoring
Definition: NeuralNet.h:827
size_t outputSize() const
output size of the DNN
Definition: NeuralNet.h:1104
ModeOutputValues operator|=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:192
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:412
bool exists(std::string histoName)
for monitoring
Definition: NeuralNet.h:830
virtual bool hasConverged(double testError)
has this training converged already?
Definition: NeuralNet.cxx:488
static double Q[]
LayerData holds the data of one layer.
Definition: NeuralNet.h:435
char name[80]
Definition: TGX11.cxx:109
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition: NeuralNet.h:786
Net(const Net &other)
d&#39;tor
Definition: NeuralNet.h:1090
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition: NeuralNet.h:1299
int repetitions() const
how many steps have to be gone until the batch is changed
Definition: NeuralNet.h:778
const_iterator m_itEnd
iterator denoting the end of the batch
Definition: NeuralNet.h:250
int randomInt(int maxValue)
Definition: NeuralNet.cxx:52
std::shared_ptr< std::function< double(double)> > m_activationFunction
stores the activation function
Definition: NeuralNet.h:701
std::vector< double > m_input
Definition: NeuralNet.h:1000