Logo ROOT   6.08/07
Reference Guide
NeuralNet.h
Go to the documentation of this file.
1 /**
2  * @file NeuralNet
3  * @author Peter Speckmayer
4  * @version 1.0
5  *
6  * @section LICENSE
7  *
8  *
9  * @section Neural net implementation
10  *
11  * An implementation of a neural net for TMVA. This neural net uses multithreading
12  *
13  */
14 
15 
16 //////////////////////////////////////////////////////////////////////////
17 // //
18 // NeuralNet //
19 // //
20 // A neural net implementation //
21 // //
22 //////////////////////////////////////////////////////////////////////////
23 
24 #ifndef TMVA_NEURAL_NET
25 #define TMVA_NEURAL_NET
26 #pragma once
27 
28 #include <map>
29 #include <vector>
30 #include <iostream>
31 #include <fstream>
32 #include <algorithm>
33 #include <iterator>
34 #include <functional>
35 #include <tuple>
36 #include <math.h>
37 #include <cassert>
38 #include <random>
39 #include <thread>
40 #include <future>
41 #include <type_traits>
42 
43 #include "Pattern.h"
44 #include "Monitoring.h"
45 
46 #include "TApplication.h"
47 #include "Timer.h"
48 
49 #include "TH1F.h"
50 #include "TH2F.h"
51 #include "TStyle.h"
52 
53 #include <fenv.h> // turn on or off exceptions for NaN and other numeric exceptions
54 
55 
56 namespace TMVA
57 {
58 
59  class IPythonInteractive;
60 
61  namespace DNN
62  {
63 
64  // double gaussDoubl (edouble mean, double sigma);
65 
66 
67 
68  double gaussDouble (double mean, double sigma);
69  double uniformDouble (double minValue, double maxValue);
70  int randomInt (int maxValue);
71 
72 
73 
74 
76  {
77  public:
79  : m_n(0)
80  , m_sumWeights(0)
81  , m_mean(0)
82  , m_squared(0)
83  {}
84 
85  inline void clear()
86  {
87  m_n = 0;
88  m_sumWeights = 0;
89  m_mean = 0;
90  m_squared = 0;
91  }
92 
93  template <typename T>
94  inline void add(T value, double weight = 1.0)
95  {
96  ++m_n; // a value has been added
97 
98  if (m_n == 1) // initialization
99  {
100  m_mean = value;
101  m_squared = 0.0;
102  m_sumWeights = weight;
103  return;
104  }
105 
106  double tmpWeight = m_sumWeights+weight;
107  double Q = value - m_mean;
108 
109  double R = Q*weight/tmpWeight;
110  m_mean += R;
111  m_squared += m_sumWeights*R*Q;
112 
113  m_sumWeights = tmpWeight;
114  }
115 
116  template <typename ITERATOR>
117  inline void add (ITERATOR itBegin, ITERATOR itEnd)
118  {
119  for (ITERATOR it = itBegin; it != itEnd; ++it)
120  add (*it);
121  }
122 
123 
124 
125  inline int count() const { return m_n; }
126  inline double weights() const { if(m_n==0) return 0; return m_sumWeights; }
127  inline double mean() const { if(m_n==0) return 0; return m_mean; }
128  inline double var() const
129  {
130  if(m_n==0)
131  return 0;
132  if (m_squared <= 0)
133  return 0;
134  return (m_squared/m_sumWeights);
135  }
136 
137  inline double var_corr () const
138  {
139  if (m_n <= 1)
140  return var ();
141 
142  return (var()*m_n/(m_n-1)); // unbiased for small sample sizes
143  }
144 
145  inline double stdDev_corr () const { return sqrt( var_corr() ); }
146  inline double stdDev () const { return sqrt( var() ); } // unbiased for small sample sizes
147 
148  private:
149  size_t m_n;
150  double m_sumWeights;
151  double m_mean;
152  double m_squared;
153  };
154 
155 
156 
157  enum class EnumFunction
158  {
159  ZERO = '0',
160  LINEAR = 'L',
161  TANH = 'T',
162  RELU = 'R',
163  SYMMRELU = 'r',
164  TANHSHIFT = 't',
165  SIGMOID = 's',
166  SOFTSIGN = 'S',
167  GAUSS = 'G',
168  GAUSSCOMPLEMENT = 'C'
169  };
170 
171 
172 
174  {
175  NONE, L1, L2, L1MAX
176  };
177 
178 
179  enum class ModeOutputValues : int
180  {
181  DIRECT = 0x01,
182  SIGMOID = 0x02,
183  SOFTMAX = 0x04,
184  BATCHNORMALIZATION = 0x08
185  };
186 
187 
188 
190  {
192  }
193 
195  {
197  return lhs;
198  }
199 
201  {
203  }
204 
206  {
208  return lhs;
209  }
210 
211 
212  template <typename T>
213  bool isFlagSet (T flag, T value)
214  {
215  return (int)(value & flag) != 0;
216  }
217 
218 
219 
220  class Net;
221 
222 
223 
224 
225 
226 
227 
228  typedef std::vector<char> DropContainer;
229 
230 
231  /*! \brief The Batch class encapsulates one mini-batch
232  *
233  * Holds a const_iterator to the beginning and the end of one batch in a vector of Pattern
234  */
235  class Batch
236  {
237  public:
238  typedef typename std::vector<Pattern>::const_iterator const_iterator;
239 
240  Batch (typename std::vector<Pattern>::const_iterator itBegin, typename std::vector<Pattern>::const_iterator itEnd)
241  : m_itBegin (itBegin)
242  , m_itEnd (itEnd)
243  {}
244 
245  const_iterator begin () const { return m_itBegin; }
246  const_iterator end () const { return m_itEnd; }
247 
248  size_t size () const { return std::distance (begin (), end ()); }
249 
250  private:
251  const_iterator m_itBegin; ///< iterator denoting the beginning of the batch
252  const_iterator m_itEnd; ///< iterator denoting the end of the batch
253  };
254 
255 
256 
257 
258 
259 
260  template <typename ItSource, typename ItWeight, typename ItTarget>
261  void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd);
262 
263 
264 
265  template <typename ItSource, typename ItWeight, typename ItPrev>
266  void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd);
267 
268 
269 
270 
271 
272  template <typename ItValue, typename ItFunction>
273  void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction);
274 
275 
276  template <typename ItValue, typename ItFunction, typename ItInverseFunction, typename ItGradient>
277  void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction, ItInverseFunction itInverseFunction, ItGradient itGradient);
278 
279 
280 
281  template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
282  void update (ItSource itSource, ItSource itSourceEnd,
283  ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
284  ItTargetGradient itTargetGradientBegin,
285  ItGradient itGradient);
286 
287 
288 
289  template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
290  void update (ItSource itSource, ItSource itSourceEnd,
291  ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
292  ItTargetGradient itTargetGradientBegin,
293  ItGradient itGradient,
294  ItWeight itWeight, double weightDecay);
295 
296 
297 
298  // ----- signature of a minimizer -------------
299  // class Minimizer
300  // {
301  // public:
302 
303  // template <typename Function, typename Variables, typename PassThrough>
304  // double operator() (Function& fnc, Variables& vars, PassThrough& passThrough)
305  // {
306  // // auto itVars = begin (vars);
307  // // auto itVarsEnd = end (vars);
308 
309  // std::vector<double> myweights;
310  // std::vector<double> gradients;
311 
312  // double value = fnc (passThrough, myweights);
313  // value = fnc (passThrough, myweights, gradients);
314  // return value;
315  // }
316  // };
317 
318 
319 
320  ///< list all the minimizer types
322  {
323  fSteepest ///< SGD
324  };
325 
326 
327 
328 
329 
330  /*! \brief Steepest Gradient Descent algorithm (SGD)
331  *
332  * Implements a steepest gradient descent minimization algorithm
333  */
334  class Steepest
335  {
336  public:
337 
339 
340 
341  /*! \brief c'tor
342  *
343  * C'tor
344  *
345  * \param learningRate denotes the learning rate for the SGD algorithm
346  * \param momentum fraction of the velocity which is taken over from the last step
347  * \param repetitions re-compute the gradients each "repetitions" steps
348  */
349  Steepest (double learningRate = 1e-4,
350  double momentum = 0.5,
351  size_t repetitions = 10)
352  : m_repetitions (repetitions)
353  , m_alpha (learningRate)
354  , m_beta (momentum)
355  {}
356 
357  /*! \brief operator to call the steepest gradient descent algorithm
358  *
359  * entry point to start the minimization procedure
360  *
361  * \param fitnessFunction (templated) function which has to be provided. This function is minimized
362  * \param weights (templated) a reference to a container of weights. The result of the minimization procedure
363  * is returned via this reference (needs to support std::begin and std::end
364  * \param passThrough (templated) object which can hold any data which the fitness function needs. This object
365  * is not touched by the minimizer; This object is provided to the fitness function when
366  * called
367  */
368  template <typename Function, typename Weights, typename PassThrough>
369  double operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough);
370 
371 
372  double m_alpha; ///< internal parameter (learningRate)
373  double m_beta; ///< internal parameter (momentum)
374  std::vector<double> m_prevGradients; ///< vector remembers the gradients of the previous step
375 
376  std::vector<double> m_localWeights; ///< local weights for reuse in thread.
377  std::vector<double> m_localGradients; ///< local gradients for reuse in thread.
378  };
379 
380 
381 
382 
383 
384 
385 
386 
387 
388 
389 
390 
391 
392 
393 
394 
395 
396 
397  template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
398  double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
399 
400 
401 
402  template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
403  double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
404 
405 
406 
407 
408  template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
409  double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
410 
411 
412 
413 
414 
415  template <typename ItWeight>
416  double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization);
417 
418 
419 
420 
421 
422 
423 
424 
425 
426 
427 
428 
429 
430 
431  /*! \brief LayerData holds the data of one layer
432  *
433  * LayerData holds the data of one layer, but not its layout
434  *
435  *
436  */
437  class LayerData
438  {
439  public:
440  typedef std::vector<double> container_type;
441 
442  typedef container_type::iterator iterator_type;
443  typedef container_type::const_iterator const_iterator_type;
444 
445  typedef std::vector<std::function<double(double)> > function_container_type;
446  typedef function_container_type::iterator function_iterator_type;
447  typedef function_container_type::const_iterator const_function_iterator_type;
448 
449  typedef DropContainer::const_iterator const_dropout_iterator;
450 
451  /*! \brief c'tor of LayerData
452  *
453  * C'tor of LayerData for the input layer
454  *
455  * \param itInputBegin iterator to the begin of a vector which holds the values of the nodes of the neural net
456  * \param itInputEnd iterator to the end of a vector which holdsd the values of the nodes of the neural net
457  * \param eModeOutput indicates a potential tranformation of the output values before further computation
458  * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
459  * output value (to create a probability); SOFTMAX applies a softmax transformation to all
460  * output values (mutually exclusive probability)
461  */
462  LayerData (const_iterator_type itInputBegin, const_iterator_type itInputEnd, ModeOutputValues eModeOutput = ModeOutputValues::DIRECT);
463 
464 
465  /*! \brief c'tor of LayerData
466  *
467  * C'tor of LayerData for the input layer
468  *
469  * \param inputSize input size of this layer
470  */
471  LayerData (size_t inputSize);
473 
474 
475  /*! \brief c'tor of LayerData
476  *
477  * C'tor of LayerData for all layers which are not the input layer; Used during the training of the DNN
478  *
479  * \param size size of the layer
480  * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
481  * \param itGradientBegin indicates the start of the gradients for this layer on the gradient vector
482  * \param itFunctionBegin indicates the start of the vector of activation functions for this layer on the
483  * activation function vector
484  * \param itInverseFunctionBegin indicates the start of the vector of activation functions for this
485  * layer on the activation function vector
486  * \param eModeOutput indicates a potential tranformation of the output values before further computation
487  * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
488  * output value (to create a probability); SOFTMAX applies a softmax transformation to all
489  * output values (mutually exclusive probability)
490  */
491  LayerData (size_t size,
492  const_iterator_type itWeightBegin,
493  iterator_type itGradientBegin,
494  std::shared_ptr<std::function<double(double)>> activationFunction,
495  std::shared_ptr<std::function<double(double)>> inverseActivationFunction,
497 
498  /*! \brief c'tor of LayerData
499  *
500  * C'tor of LayerData for all layers which are not the input layer; Used during the application of the DNN
501  *
502  * \param size size of the layer
503  * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
504  * \param itFunctionBegin indicates the start of the vector of activation functions for this layer on the
505  * activation function vector
506  * \param eModeOutput indicates a potential tranformation of the output values before further computation
507  * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
508  * output value (to create a probability); SOFTMAX applies a softmax transformation to all
509  * output values (mutually exclusive probability)
510  */
511  LayerData (size_t size, const_iterator_type itWeightBegin,
512  std::shared_ptr<std::function<double(double)>> activationFunction,
514 
515  /*! \brief copy c'tor of LayerData
516  *
517  *
518  */
519  LayerData (const LayerData& other)
520  : m_size (other.m_size)
521  , m_itInputBegin (other.m_itInputBegin)
522  , m_itInputEnd (other.m_itInputEnd)
523  , m_deltas (other.m_deltas)
524  , m_valueGradients (other.m_valueGradients)
525  , m_values (other.m_values)
526  , m_itDropOut (other.m_itDropOut)
527  , m_hasDropOut (other.m_hasDropOut)
528  , m_itConstWeightBegin (other.m_itConstWeightBegin)
529  , m_itGradientBegin (other.m_itGradientBegin)
530  , m_activationFunction (other.m_activationFunction)
531  , m_inverseActivationFunction (other.m_inverseActivationFunction)
532  , m_isInputLayer (other.m_isInputLayer)
533  , m_hasWeights (other.m_hasWeights)
534  , m_hasGradients (other.m_hasGradients)
535  , m_eModeOutput (other.m_eModeOutput)
536  {}
537 
538  /*! \brief move c'tor of LayerData
539  *
540  *
541  */
543  : m_size (other.m_size)
544  , m_itInputBegin (other.m_itInputBegin)
545  , m_itInputEnd (other.m_itInputEnd)
546  , m_deltas (std::move(other.m_deltas))
547  , m_valueGradients (std::move(other.m_valueGradients))
548  , m_values (std::move(other.m_values))
549  , m_itDropOut (other.m_itDropOut)
550  , m_hasDropOut (other.m_hasDropOut)
551  , m_itConstWeightBegin (other.m_itConstWeightBegin)
552  , m_itGradientBegin (other.m_itGradientBegin)
553  , m_activationFunction (std::move(other.m_activationFunction))
554  , m_inverseActivationFunction (std::move(other.m_inverseActivationFunction))
555  , m_isInputLayer (other.m_isInputLayer)
556  , m_hasWeights (other.m_hasWeights)
557  , m_hasGradients (other.m_hasGradients)
558  , m_eModeOutput (other.m_eModeOutput)
559  {}
560 
561 
562  /*! \brief change the input iterators
563  *
564  *
565  * \param itInputBegin indicates the start of the input node vector
566  * \param itInputEnd indicates the end of the input node vector
567  *
568  */
569  void setInput (const_iterator_type itInputBegin, const_iterator_type itInputEnd)
570  {
571  m_isInputLayer = true;
572  m_itInputBegin = itInputBegin;
573  m_itInputEnd = itInputEnd;
574  }
575 
576  /*! \brief clear the values and the deltas
577  *
578  *
579  */
580  void clear ()
581  {
582  m_values.assign (m_values.size (), 0.0);
583  m_deltas.assign (m_deltas.size (), 0.0);
584  }
585 
586  const_iterator_type valuesBegin () const { return m_isInputLayer ? m_itInputBegin : begin (m_values); } ///< returns const iterator to the begin of the (node) values
587  const_iterator_type valuesEnd () const { return m_isInputLayer ? m_itInputEnd : end (m_values); } ///< returns iterator to the end of the (node) values
588 
589  iterator_type valuesBegin () { assert (!m_isInputLayer); return begin (m_values); } ///< returns iterator to the begin of the (node) values
590  iterator_type valuesEnd () { assert (!m_isInputLayer); return end (m_values); } ///< returns iterator to the end of the (node) values
591 
592  ModeOutputValues outputMode () const { return m_eModeOutput; } ///< returns the output mode
593  container_type probabilities () const { return computeProbabilities (); } ///< computes the probabilities from the current node values and returns them
594 
595  iterator_type deltasBegin () { return begin (m_deltas); } ///< returns iterator to the begin of the deltas (back-propagation)
596  iterator_type deltasEnd () { return end (m_deltas); } ///< returns iterator to the end of the deltas (back-propagation)
597 
598  const_iterator_type deltasBegin () const { return begin (m_deltas); } ///< returns const iterator to the begin of the deltas (back-propagation)
599  const_iterator_type deltasEnd () const { return end (m_deltas); } ///< returns const iterator to the end of the deltas (back-propagation)
600 
601  iterator_type valueGradientsBegin () { return begin (m_valueGradients); } ///< returns iterator to the begin of the gradients of the node values
602  iterator_type valueGradientsEnd () { return end (m_valueGradients); } ///< returns iterator to the end of the gradients of the node values
603 
604  const_iterator_type valueGradientsBegin () const { return begin (m_valueGradients); } ///< returns const iterator to the begin of the gradients
605  const_iterator_type valueGradientsEnd () const { return end (m_valueGradients); } ///< returns const iterator to the end of the gradients
606 
607  iterator_type gradientsBegin () { assert (m_hasGradients); return m_itGradientBegin; } ///< returns iterator to the begin of the gradients
608  const_iterator_type gradientsBegin () const { assert (m_hasGradients); return m_itGradientBegin; } ///< returns const iterator to the begin of the gradients
609  const_iterator_type weightsBegin () const { assert (m_hasWeights); return m_itConstWeightBegin; } ///< returns const iterator to the begin of the weights for this layer
610 
611  std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; }
612  std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; }
613 
614  /*! \brief set the drop-out info for this layer
615  *
616  */
617  template <typename Iterator>
618  void setDropOut (Iterator itDrop) { m_itDropOut = itDrop; m_hasDropOut = true; }
619 
620  /*! \brief clear the drop-out-data for this layer
621  *
622  *
623  */
624  void clearDropOut () { m_hasDropOut = false; }
625 
626  bool hasDropOut () const { return m_hasDropOut; } ///< has this layer drop-out turned on?
627  const_dropout_iterator dropOut () const { assert (m_hasDropOut); return m_itDropOut; } ///< return the begin of the drop-out information
628 
629  size_t size () const { return m_size; } ///< return the size of the layer
630 
631  private:
632 
633  /*! \brief compute the probabilities from the node values
634  *
635  *
636  */
637  container_type computeProbabilities () const;
638 
639  private:
640 
641  size_t m_size; ////< layer size
642 
643  const_iterator_type m_itInputBegin; ///< iterator to the first of the nodes in the input node vector
644  const_iterator_type m_itInputEnd; ///< iterator to the end of the nodes in the input node vector
645 
646  std::vector<double> m_deltas; ///< stores the deltas for the DNN training
647  std::vector<double> m_valueGradients; ///< stores the gradients of the values (nodes)
648  std::vector<double> m_values; ///< stores the values of the nodes in this layer
649  const_dropout_iterator m_itDropOut; ///< iterator to a container indicating if the corresponding node is to be dropped
650  bool m_hasDropOut; ///< dropOut is turned on?
651 
652  const_iterator_type m_itConstWeightBegin; ///< const iterator to the first weight of this layer in the weight vector
653  iterator_type m_itGradientBegin; ///< iterator to the first gradient of this layer in the gradient vector
654 
655  std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< activation function for this layer
656  std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< inverse activation function for this layer
657 
658  bool m_isInputLayer; ///< is this layer an input layer
659  bool m_hasWeights; ///< does this layer have weights (it does not if it is the input layer)
660  bool m_hasGradients; ///< does this layer have gradients (only if in training mode)
661 
662  ModeOutputValues m_eModeOutput; ///< stores the output mode (DIRECT, SIGMOID, SOFTMAX)
663 
664  };
665 
666 
667 
668 
669 
670  /*! \brief Layer defines the layout of a layer
671  *
672  * Layer defines the layout of a specific layer in the DNN
673  * Objects of this class don't hold the layer data itself (see class "LayerData")
674  *
675  */
676  class Layer
677  {
678  public:
679 
680  /*! \brief c'tor for defining a Layer
681  *
682  *
683  * \param itInputBegin indicates the start of the input node vector
684  * \param itInputEnd indicates the end of the input node vector
685  *
686  */
687  Layer (size_t numNodes, EnumFunction activationFunction, ModeOutputValues eModeOutputValues = ModeOutputValues::DIRECT);
688 
689  ModeOutputValues modeOutputValues () const { return m_eModeOutputValues; } ///< get the mode-output-value (direct, probabilities)
690  void modeOutputValues (ModeOutputValues eModeOutputValues) { m_eModeOutputValues = eModeOutputValues; } ///< set the mode-output-value
691 
692  size_t numNodes () const { return m_numNodes; } ///< return the number of nodes of this layer
693  size_t numWeights (size_t numInputNodes) const { return numInputNodes * numNodes (); } ///< return the number of weights for this layer (fully connected)
694 
695  std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; } ///< fetch the activation function for this layer
696  std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; } ///< fetch the inverse activation function for this layer
697 
698  EnumFunction activationFunctionType () const { return m_activationFunctionType; } ///< get the activation function type for this layer
699 
700  private:
701 
702 
703  std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< stores the activation function
704  std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< stores the inverse activation function
705 
706 
707  size_t m_numNodes;
708 
709  ModeOutputValues m_eModeOutputValues; ///< do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned as such
711 
712  friend class Net;
713  };
714 
715 
716 
717 
718 
719  template <typename LAYERDATA>
720  void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
721 
722 
723  template <typename LAYERDATA>
724  void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
725 
726 
727  template <typename LAYERDATA>
728  void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double weightDecay, EnumRegularization regularization);
729 
730 
731 
732  /*! \brief Settings for the training of the neural net
733  *
734  *
735  */
736  class Settings
737  {
738  public:
739 
740  /*! \brief c'tor
741  *
742  *
743  */
745  size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
746  double _factorWeightDecay = 1e-5, TMVA::DNN::EnumRegularization _regularization = TMVA::DNN::EnumRegularization::NONE,
747  MinimizerType _eMinimizerType = MinimizerType::fSteepest,
748  double _learningRate = 1e-5, double _momentum = 0.3,
749  int _repetitions = 3,
750  bool _multithreading = true);
751 
752  /*! \brief d'tor
753  *
754  *
755  */
756  virtual ~Settings ();
757 
758 
759  /*! \brief set the drop-out configuration (layer-wise)
760  *
761  * \param begin begin of an array or vector denoting the drop-out probabilities for each layer
762  * \param end end of an array or vector denoting the drop-out probabilities for each layer
763  * \param _dropRepetitions denotes after how many repetitions the drop-out setting (which nodes are dropped out exactly) is changed
764  */
765  template <typename Iterator>
766  void setDropOut (Iterator begin, Iterator end, size_t _dropRepetitions) { m_dropOut.assign (begin, end); m_dropRepetitions = _dropRepetitions; }
767 
768  size_t dropRepetitions () const { return m_dropRepetitions; }
769  const std::vector<double>& dropFractions () const { return m_dropOut; }
770 
771  void setMonitoring (std::shared_ptr<Monitoring> ptrMonitoring) { fMonitoring = ptrMonitoring; } ///< prepared for monitoring
772 
773  size_t convergenceSteps () const { return m_convergenceSteps; } ///< how many steps until training is deemed to have converged
774  size_t batchSize () const { return m_batchSize; } ///< mini-batch size
775  size_t testRepetitions () const { return m_testRepetitions; } ///< how often is the test data tested
776  double factorWeightDecay () const { return m_factorWeightDecay; } ///< get the weight-decay factor
777 
778  double learningRate () const { return fLearningRate; } ///< get the learning rate
779  double momentum () const { return fMomentum; } ///< get the momentum (e.g. for SGD)
780  int repetitions () const { return fRepetitions; } ///< how many steps have to be gone until the batch is changed
781  MinimizerType minimizerType () const { return fMinimizerType; } ///< which minimizer shall be used (e.g. SGD)
782 
783 
784 
785 
786 
787 
788  virtual void testSample (double /*error*/, double /*output*/, double /*target*/, double /*weight*/) {} ///< virtual function to be used for monitoring (callback)
789  virtual void startTrainCycle () ///< callback for monitoring and logging
790  {
791  m_convergenceCount = 0;
792  m_maxConvergenceCount= 0;
793  m_minError = 1e10;
794  }
795  virtual void endTrainCycle (double /*error*/) {} ///< callback for monitoring and logging
796 
797  virtual void setProgressLimits (double minProgress = 0, double maxProgress = 100) ///< for monitoring and logging (set the current "progress" limits for the display of the progress)
798  {
799  m_minProgress = minProgress;
800  m_maxProgress = maxProgress;
801  }
802  virtual void startTraining () ///< start drawing the progress bar
803  {
804  m_timer.DrawProgressBar (Int_t(m_minProgress));
805  }
806  virtual void cycle (double progress, TString text) ///< advance on the progress bar
807  {
808  m_timer.DrawProgressBar (Int_t(m_minProgress+(m_maxProgress-m_minProgress)*(progress/100.0)), text);
809  }
810 
811  virtual void startTestCycle () {} ///< callback for monitoring and loggging
812  virtual void endTestCycle () {} ///< callback for monitoring and loggging
813  virtual void testIteration () {} ///< callback for monitoring and loggging
814  virtual void drawSample (const std::vector<double>& /*input*/, const std::vector<double>& /* output */, const std::vector<double>& /* target */, double /* patternWeight */) {} ///< callback for monitoring and loggging
815 
816  virtual void computeResult (const Net& /* net */, std::vector<double>& /* weights */) {} ///< callback for monitoring and loggging
817 
818  virtual bool hasConverged (double testError); ///< has this training converged already?
819 
820  EnumRegularization regularization () const { return m_regularization; } ///< some regularization of the DNN is turned on?
821 
822  bool useMultithreading () const { return m_useMultithreading; } ///< is multithreading turned on?
823 
824 
825  void pads (int numPads) { if (fMonitoring) fMonitoring->pads (numPads); } ///< preparation for monitoring
826  void create (std::string histoName, int bins, double min, double max) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max); } ///< for monitoring
827  void create (std::string histoName, int bins, double min, double max, int bins2, double min2, double max2) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max, bins2, min2, max2); } ///< for monitoring
828  void addPoint (std::string histoName, double x) { if (fMonitoring) fMonitoring->addPoint (histoName, x); } ///< for monitoring
829  void addPoint (std::string histoName, double x, double y) {if (fMonitoring) fMonitoring->addPoint (histoName, x, y); } ///< for monitoring
830  void plot (std::string histoName, std::string options, int pad, EColor color) { if (fMonitoring) fMonitoring->plot (histoName, options, pad, color); } ///< for monitoring
831  void clear (std::string histoName) { if (fMonitoring) fMonitoring->clear (histoName); } ///< for monitoring
832  bool exists (std::string histoName) { if (fMonitoring) return fMonitoring->exists (histoName); return false; } ///< for monitoring
833 
834  size_t convergenceCount () const { return m_convergenceCount; } ///< returns the current convergence count
835  size_t maxConvergenceCount () const { return m_maxConvergenceCount; } ///< returns the max convergence count so far
836  size_t minError () const { return m_minError; } ///< returns the smallest error so far
837 
838  public:
839  Timer m_timer; ///< timer for monitoring
840  double m_minProgress; ///< current limits for the progress bar
841  double m_maxProgress; ///< current limits for the progress bar
842 
843 
844  size_t m_convergenceSteps; ///< number of steps without improvement to consider the DNN to have converged
845  size_t m_batchSize; ///< mini-batch size
848 
849  size_t count_E;
850  size_t count_dE;
851  size_t count_mb_E;
852  size_t count_mb_dE;
853 
855 
857  std::vector<double> m_dropOut;
858 
860  double fMomentum;
863 
866  double m_minError;
867 
868 
869  protected:
871 
872  std::shared_ptr<Monitoring> fMonitoring;
873  };
874 
875 
876 
877 
878 
879 
880 
881 
882 
883 
884 
885 
886 
887 
888 
889 
890 
891 
892 
893 
894 
895 
896 
897  /*! \brief Settings for classification
898  *
899  * contains additional settings if the DNN problem is classification
900  */
902  {
903  public:
904  /*! \brief c'tor
905  *
906  *
907  */
909  size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
910  double _factorWeightDecay = 1e-5, EnumRegularization _regularization = EnumRegularization::NONE,
911  size_t _scaleToNumEvents = 0, MinimizerType _eMinimizerType = MinimizerType::fSteepest,
912  double _learningRate = 1e-5, double _momentum = 0.3, int _repetitions = 3,
913  bool _useMultithreading = true)
914  : Settings (name, _convergenceSteps, _batchSize, _testRepetitions, _factorWeightDecay,
915  _regularization, _eMinimizerType, _learningRate, _momentum, _repetitions, _useMultithreading)
916  , m_ams ()
917  , m_sumOfSigWeights (0)
918  , m_sumOfBkgWeights (0)
919  , m_scaleToNumEvents (_scaleToNumEvents)
920  , m_cutValue (10.0)
921  , m_pResultPatternContainer (NULL)
922  , m_fileNameResult ()
923  , m_fileNameNetConfig ()
924  {
925  }
926 
927  /*! \brief d'tor
928  *
929  *
930  */
932  {
933  }
934 
935  void startTrainCycle ();
936  void endTrainCycle (double /*error*/);
937  void testIteration () { if (fMonitoring) fMonitoring->ProcessEvents (); }
938 
939 
940  /* void createHistograms () */
941  /* { */
942  /* std::cout << "is hist ROC existing?" << std::endl; */
943  /* if (m_histROC) */
944  /* { */
945  /* std::cout << "--> yes" << std::endl; */
946  /* fMonitoring->ProcessEvents (); */
947  /* return; */
948  /* } */
949 
950  /* std::cout << "create histograms" << std::endl; */
951  /* TCanvas* canvas = fMonitoring->GetCanvas (); */
952  /* if (canvas) */
953  /* { */
954  /* std::cout << "canvas divide" << std::endl; */
955  /* canvas->cd (); */
956  /* canvas->Divide (2,2); */
957  /* } */
958  /* if (!m_histROC) */
959  /* { */
960  /* m_histROC = new TH2F ("ROC","ROC", 1000, 0, 1.0, 1000, 0, 1.0); m_histROC->SetDirectory (0); */
961  /* m_histROC->SetLineColor (kBlue); */
962  /* } */
963  /* if (!m_histSignificance) */
964  /* { */
965  /* m_histSignificance = new TH2F ("Significance", "Significance", 1000, 0,1.0, 5, 0.0, 2.0); */
966  /* m_histSignificance->SetDirectory (0); */
967  /* m_histSignificance->SetBit (TH1::kCanRebin); */
968  /* m_histROC->SetLineColor (kRed); */
969  /* } */
970  /* if (!m_histError) */
971  /* { */
972  /* m_histError = new TH1F ("Error", "Error", 100, 0, 100); */
973  /* m_histError->SetDirectory (0); */
974  /* m_histError->SetBit (TH1::kCanRebin); */
975  /* m_histROC->SetLineColor (kGreen); */
976  /* } */
977  /* if (!m_histOutputSignal) */
978  /* { */
979  /* m_histOutputSignal = new TH1F ("Signal", "Signal", 100, 0, 1.0); */
980  /* m_histOutputSignal->SetDirectory (0); */
981  /* m_histOutputSignal->SetBit (TH1::kCanRebin); */
982  /* } */
983  /* if (!m_histOutputBackground) */
984  /* { */
985  /* m_histOutputBackground = new TH1F ("Background", "Background", 100, 0, 1.0); */
986  /* m_histOutputBackground->SetDirectory (0); */
987  /* m_histOutputBackground->SetBit (TH1::kCanRebin); */
988  /* } */
989 
990  /* fMonitoring->ProcessEvents (); */
991  /* } */
992 
993  void testSample (double error, double output, double target, double weight);
994 
995  virtual void startTestCycle ();
996  virtual void endTestCycle ();
997 
998 
999  void setWeightSums (double sumOfSigWeights, double sumOfBkgWeights);
1000  void setResultComputation (std::string _fileNameNetConfig, std::string _fileNameResult, std::vector<Pattern>* _resultPatternContainer);
1001 
1002  std::vector<double> m_input;
1003  std::vector<double> m_output;
1004  std::vector<double> m_targets;
1005  std::vector<double> m_weights;
1006 
1007  std::vector<double> m_ams;
1008  std::vector<double> m_significances;
1009 
1010 
1014 
1015  double m_cutValue;
1016  std::vector<Pattern>* m_pResultPatternContainer;
1017  std::string m_fileNameResult;
1018  std::string m_fileNameNetConfig;
1019 
1020 
1021  /* TH2F* m_histROC; */
1022  /* TH2F* m_histSignificance; */
1023 
1024  /* TH1F* m_histError; */
1025  /* TH1F* m_histOutputSignal; */
1026  /* TH1F* m_histOutputBackground; */
1027  };
1028 
1029 
1030 
1031 
1032 
1033 
1034 
1035  ///< used to distinguish between different function signatures
1036  enum class ModeOutput
1037  {
1038  FETCH
1039  };
1040 
1041  /*! \brief error functions to be chosen from
1042  *
1043  *
1044  */
1046  {
1047  SUMOFSQUARES = 'S',
1048  CROSSENTROPY = 'C',
1050  };
1051 
1052  /*! \brief weight initialization strategies to be chosen from
1053  *
1054  *
1055  */
1057  {
1059  };
1060 
1061 
1062 
1063  /*! \brief neural net
1064  *
1065  * holds the structure of all layers and some data for the whole net
1066  * does not know the layer data though (i.e. values of the nodes and weights)
1067  */
1068  class Net
1069  {
1070  public:
1071 
1072  typedef std::vector<double> container_type;
1073  typedef container_type::iterator iterator_type;
1074  typedef std::pair<iterator_type,iterator_type> begin_end_type;
1075 
1076 
1077  /*! \brief c'tor
1078  *
1079  *
1080  */
1081  Net ()
1082  : m_eErrorFunction (ModeErrorFunction::SUMOFSQUARES)
1083  , m_sizeInput (0)
1084  , m_layers ()
1085  {
1086  }
1087 
1088  /*! \brief d'tor
1089  *
1090  *
1091  */
1092  Net (const Net& other)
1093  : m_eErrorFunction (other.m_eErrorFunction)
1094  , m_sizeInput (other.m_sizeInput)
1095  , m_layers (other.m_layers)
1096  {
1097  }
1098 
1099  void setInputSize (size_t sizeInput) { m_sizeInput = sizeInput; } ///< set the input size of the DNN
1100  void setOutputSize (size_t sizeOutput) { m_sizeOutput = sizeOutput; } ///< set the output size of the DNN
1101  void addLayer (Layer& layer) { m_layers.push_back (layer); } ///< add a layer (layout)
1102  void addLayer (Layer&& layer) { m_layers.push_back (layer); }
1103  void setErrorFunction (ModeErrorFunction eErrorFunction) { m_eErrorFunction = eErrorFunction; } ///< which error function is to be used
1104 
1105  size_t inputSize () const { return m_sizeInput; } ///< input size of the DNN
1106  size_t outputSize () const { return m_sizeOutput; } ///< output size of the DNN
1107 
1108  /*! \brief set the drop out configuration
1109  *
1110  *
1111  */
1112  template <typename WeightsType, typename DropProbabilities>
1113  void dropOutWeightFactor (WeightsType& weights,
1114  const DropProbabilities& drops,
1115  bool inverse = false);
1116 
1117  /*! \brief start the training
1118  *
1119  * \param weights weight vector
1120  * \param trainPattern training pattern
1121  * \param testPattern test pattern
1122  * \param minimizer use this minimizer for training (e.g. SGD)
1123  * \param settings settings used for this training run
1124  */
1125  template <typename Minimizer>
1126  double train (std::vector<double>& weights,
1127  std::vector<Pattern>& trainPattern,
1128  const std::vector<Pattern>& testPattern,
1129  Minimizer& minimizer,
1130  Settings& settings);
1131 
1132  /*! \brief pre-training for future use
1133  *
1134  *
1135  */
1136  template <typename Minimizer>
1137  void preTrain (std::vector<double>& weights,
1138  std::vector<Pattern>& trainPattern,
1139  const std::vector<Pattern>& testPattern,
1140  Minimizer& minimizer, Settings& settings);
1141 
1142 
1143  /*! \brief executes one training cycle
1144  *
1145  * \param minimizier the minimizer to be used
1146  * \param weights the weight vector to be used
1147  * \param itPatternBegin the pattern to be trained with
1148  * \param itPatternEnd the pattern to be trainied with
1149  * \param settings the settings for the training
1150  * \param dropContainer the configuration for DNN drop-out
1151  */
1152  template <typename Iterator, typename Minimizer>
1153  inline double trainCycle (Minimizer& minimizer, std::vector<double>& weights,
1154  Iterator itPatternBegin, Iterator itPatternEnd,
1155  Settings& settings,
1156  DropContainer& dropContainer);
1157 
1158  size_t numWeights (size_t trainingStartLayer = 0) const; ///< returns the number of weights in this net
1159  size_t numNodes (size_t trainingStartLayer = 0) const; ///< returns the number of nodes in this net
1160 
1161  template <typename Weights>
1162  std::vector<double> compute (const std::vector<double>& input, const Weights& weights) const; ///< compute the net with the given input and the given weights
1163 
1164  template <typename Weights, typename PassThrough>
1165  double operator() (PassThrough& settingsAndBatch, const Weights& weights) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients
1166 
1167  template <typename Weights, typename PassThrough, typename OutContainer>
1168  double operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput eFetch, OutContainer& outputContainer) const; ///< execute computation of the DNN for one mini-batch; helper function
1169 
1170  template <typename Weights, typename Gradients, typename PassThrough>
1171  double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well
1172 
1173  template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1174  double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const;
1175 
1176 
1177  template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1178  std::vector<std::vector<LayerData>> prepareLayerData (LayerContainer& layers,
1179  Batch& batch,
1180  const DropContainer& dropContainer,
1181  ItWeight itWeightBegin,
1182  ItWeight itWeightEnd,
1183  ItGradient itGradientBegin,
1184  ItGradient itGradientEnd,
1185  size_t& totalNumWeights) const;
1186 
1187  template <typename LayerContainer>
1188  void forwardPattern (const LayerContainer& _layers,
1189  std::vector<LayerData>& layerData) const;
1190 
1191 
1192  template <typename LayerContainer, typename LayerPatternContainer>
1193  void forwardBatch (const LayerContainer& _layers,
1194  LayerPatternContainer& layerPatternData,
1195  std::vector<double>& valuesMean,
1196  std::vector<double>& valuesStdDev,
1197  size_t trainFromLayer) const;
1198 
1199  template <typename OutputContainer>
1200  void fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const;
1201 
1202  template <typename OutputContainer>
1203  void fetchOutput (const std::vector<LayerData>& layerPatternData, OutputContainer& outputContainer) const;
1204 
1205 
1206  template <typename ItWeight>
1207  std::tuple</*sumError*/double,/*sumWeights*/double> computeError (const Settings& settings,
1208  std::vector<LayerData>& lastLayerData,
1209  Batch& batch,
1210  ItWeight itWeightBegin,
1211  ItWeight itWeightEnd) const;
1212 
1213  template <typename Settings>
1214  void backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1215  const Settings& settings,
1216  size_t trainFromLayer,
1217  size_t totalNumWeights) const;
1218 
1219 
1220 
1221  /*! \brief main NN computation function
1222  *
1223  *
1224  */
1225  template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1226  double forward_backward (LayerContainer& layers, PassThrough& settingsAndBatch,
1227  ItWeight itWeightBegin, ItWeight itWeightEnd,
1228  ItGradient itGradientBegin, ItGradient itGradientEnd,
1229  size_t trainFromLayer,
1230  OutContainer& outputContainer, bool fetchOutput) const;
1231 
1232 
1233 
1234  double E ();
1235  void dE ();
1236 
1237 
1238  /*! \brief computes the error of the DNN
1239  *
1240  *
1241  */
1242  template <typename Container, typename ItWeight>
1243  double errorFunction (LayerData& layerData,
1244  Container truth,
1245  ItWeight itWeight,
1246  ItWeight itWeightEnd,
1247  double patternWeight,
1248  double factorWeightDecay,
1249  EnumRegularization eRegularization) const;
1250 
1251 
1252  const std::vector<Layer>& layers () const { return m_layers; } ///< returns the layers (structure)
1253  std::vector<Layer>& layers () { return m_layers; } ///< returns the layers (structure)
1254 
1255  void removeLayer () { m_layers.pop_back (); } ///< remove one layer
1256 
1257 
1258  void clear () ///< clear one layer
1259  {
1260  m_layers.clear ();
1261  m_eErrorFunction = ModeErrorFunction::SUMOFSQUARES;
1262  }
1263 
1264 
1265  template <typename OutIterator>
1266  void initializeWeights (WeightInitializationStrategy eInitStrategy,
1267  OutIterator itWeight); ///< initialize the weights with the given strategy
1268 
1269  protected:
1270 
1271  void fillDropContainer (DropContainer& dropContainer, double dropFraction, size_t numNodes) const; ///< prepare the drop-out-container (select the nodes which are to be dropped out)
1272 
1273 
1274  private:
1275 
1276  ModeErrorFunction m_eErrorFunction; ///< denotes the error function
1277  size_t m_sizeInput; ///< input size of this DNN
1278  size_t m_sizeOutput; ///< outut size of this DNN
1279  std::vector<Layer> m_layers; ///< layer-structure-data
1280 
1281  protected:
1282  // variables for JsMVA (interactive training in jupyter notebook)
1283  IPythonInteractive *fInteractive = nullptr;
1284  bool * fExitFromTraining = nullptr;
1285  UInt_t *fIPyMaxIter = nullptr, *fIPyCurrentIter = nullptr;
1286 
1287  public:
1288 
1289  // setup ipython interactive variables
1291  fInteractive = fI;
1292  fExitFromTraining = fE;
1293  fIPyMaxIter = M;
1294  fIPyCurrentIter = C;
1295  }
1296  };
1297 
1298 
1299 
1300 
1301 typedef std::tuple<Settings&, Batch&, DropContainer&> pass_through_type;
1302 
1303 
1304 
1305 
1306 
1307 
1308 
1309  } // namespace DNN
1310 } // namespace TMVA
1311 
1312 
1313 // include the implementations (in header file, because they are templated)
1314 #include "NeuralNet.icc"
1315 
1316 #endif
1317 
void addPoint(std::string histoName, double x)
for monitoring
Definition: NeuralNet.h:828
void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:937
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition: NeuralNet.h:612
virtual void startTraining()
Definition: NeuralNet.h:802
ModeErrorFunction m_eErrorFunction
denotes the error function
Definition: NeuralNet.h:1276
const_iterator_type weightsBegin() const
returns const iterator to the begin of the weights for this layer
Definition: NeuralNet.h:609
void setInput(const_iterator_type itInputBegin, const_iterator_type itInputEnd)
change the input iterators
Definition: NeuralNet.h:569
void addLayer(Layer &&layer)
Definition: NeuralNet.h:1102
size_t convergenceCount() const
returns the current convergence count
Definition: NeuralNet.h:834
virtual ~ClassificationSettings()
d&#39;tor
Definition: NeuralNet.h:931
virtual void cycle(double progress, TString text)
Definition: NeuralNet.h:806
MinimizerType
< list all the minimizer types
Definition: NeuralNet.h:321
void add(ITERATOR itBegin, ITERATOR itEnd)
Definition: NeuralNet.h:117
double var_corr() const
Definition: NeuralNet.h:137
size_t m_sizeInput
input size of this DNN
Definition: NeuralNet.h:1277
std::vector< Layer > & layers()
returns the layers (structure)
Definition: NeuralNet.h:1253
void create(std::string histoName, int bins, double min, double max, int bins2, double min2, double max2)
for monitoring
Definition: NeuralNet.h:827
std::vector< char > DropContainer
Definition: NeuralNet.h:220
void setDropOut(Iterator begin, Iterator end, size_t _dropRepetitions)
set the drop-out configuration (layer-wise)
Definition: NeuralNet.h:766
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
stores the inverse activation function
Definition: NeuralNet.h:704
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:595
bool isFlagSet(T flag, T value)
Definition: NeuralNet.h:213
double m_maxProgress
current limits for the progress bar
Definition: NeuralNet.h:841
Steepest Gradient Descent algorithm (SGD)
Definition: NeuralNet.h:334
ModeOutputValues modeOutputValues() const
get the mode-output-value (direct, probabilities)
Definition: NeuralNet.h:689
double T(double x)
Definition: ChebyshevPol.h:34
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition: NeuralNet.h:773
std::pair< iterator_type, iterator_type > begin_end_type
Definition: NeuralNet.h:1074
double m_dropRepetitions
Definition: NeuralNet.h:856
std::vector< std::function< double(double)> > function_container_type
Definition: NeuralNet.h:445
void add(T value, double weight=1.0)
Definition: NeuralNet.h:94
const std::vector< double > & dropFractions() const
Definition: NeuralNet.h:769
Net()
c&#39;tor
Definition: NeuralNet.h:1081
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
Definition: NeuralNet.icc:565
size_t inputSize() const
input size of the DNN
Definition: NeuralNet.h:1105
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
Definition: NeuralNet.h:1103
iterator_type valueGradientsEnd()
returns iterator to the end of the gradients of the node values
Definition: NeuralNet.h:602
std::shared_ptr< Monitoring > fMonitoring
Definition: NeuralNet.h:872
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition: NeuralNet.h:820
Basic string class.
Definition: TString.h:137
bool useMultithreading() const
is multithreading turned on?
Definition: NeuralNet.h:822
int Int_t
Definition: RtypesCore.h:41
function_container_type::iterator function_iterator_type
Definition: NeuralNet.h:446
ModeOutputValues operator|(ModeOutputValues lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:189
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition: NeuralNet.h:830
const_iterator_type gradientsBegin() const
returns const iterator to the begin of the gradients
Definition: NeuralNet.h:608
iterator_type m_itGradientBegin
iterator to the first gradient of this layer in the gradient vector
Definition: NeuralNet.h:653
STL namespace.
bool m_hasGradients
does this layer have gradients (only if in training mode)
Definition: NeuralNet.h:660
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
Definition: NeuralNet.icc:176
ModeOutputValues operator &(ModeOutputValues lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:200
ModeOutputValues m_eModeOutput
stores the output mode (DIRECT, SIGMOID, SOFTMAX)
Definition: NeuralNet.h:662
size_t numWeights(size_t numInputNodes) const
return the number of weights for this layer (fully connected)
Definition: NeuralNet.h:693
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Definition: NeuralNet.h:1100
virtual void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:813
bool m_isInputLayer
is this layer an input layer
Definition: NeuralNet.h:658
double momentum() const
get the momentum (e.g. for SGD)
Definition: NeuralNet.h:779
EnumFunction m_activationFunctionType
Definition: NeuralNet.h:710
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
iterator_type valuesEnd()
returns iterator to the end of the (node) values
Definition: NeuralNet.h:590
neural net
Definition: NeuralNet.h:1068
std::shared_ptr< std::function< double(double)> > activationFunction() const
fetch the activation function for this layer
Definition: NeuralNet.h:695
double learningRate() const
get the learning rate
Definition: NeuralNet.h:778
size_t m_sizeOutput
outut size of this DNN
Definition: NeuralNet.h:1278
size_t m_numNodes
Definition: NeuralNet.h:707
std::vector< double > m_valueGradients
stores the gradients of the values (nodes)
Definition: NeuralNet.h:647
std::vector< double > m_significances
Definition: NeuralNet.h:1008
virtual void startTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:811
size_t m_convergenceSteps
number of steps without improvement to consider the DNN to have converged
Definition: NeuralNet.h:844
double sqrt(double)
container_type::const_iterator const_iterator_type
Definition: NeuralNet.h:443
MinimizerType fMinimizerType
Definition: NeuralNet.h:862
Double_t x[n]
Definition: legend1.C:17
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:491
std::vector< double > m_dropOut
Definition: NeuralNet.h:857
const_iterator_type valueGradientsBegin() const
returns const iterator to the begin of the gradients
Definition: NeuralNet.h:604
std::vector< Layer > m_layers
layer-structure-data
Definition: NeuralNet.h:1279
ModeOutputValues outputMode() const
returns the output mode
Definition: NeuralNet.h:592
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
Definition: NeuralNet.h:1290
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition: NeuralNet.h:611
size_t m_batchSize
mini-batch size
Definition: NeuralNet.h:845
const_iterator_type m_itConstWeightBegin
const iterator to the first weight of this layer in the weight vector
Definition: NeuralNet.h:652
function_container_type::const_iterator const_function_iterator_type
Definition: NeuralNet.h:447
ModeOutputValues m_eModeOutputValues
do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned...
Definition: NeuralNet.h:709
void removeLayer()
remove one layer
Definition: NeuralNet.h:1255
bool hasDropOut() const
has this layer drop-out turned on?
Definition: NeuralNet.h:626
void setInputSize(size_t sizeInput)
set the input size of the DNN
Definition: NeuralNet.h:1099
const_iterator_type m_itInputBegin
iterator to the first of the nodes in the input node vector
Definition: NeuralNet.h:643
bool m_hasDropOut
dropOut is turned on?
Definition: NeuralNet.h:650
ClassificationSettings(TString name, size_t _convergenceSteps=15, size_t _batchSize=10, size_t _testRepetitions=7, double _factorWeightDecay=1e-5, EnumRegularization _regularization=EnumRegularization::NONE, size_t _scaleToNumEvents=0, MinimizerType _eMinimizerType=MinimizerType::fSteepest, double _learningRate=1e-5, double _momentum=0.3, int _repetitions=3, bool _useMultithreading=true)
c&#39;tor
Definition: NeuralNet.h:908
const_iterator_type valueGradientsEnd() const
returns const iterator to the end of the gradients
Definition: NeuralNet.h:605
const_iterator end() const
Definition: NeuralNet.h:246
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition: NeuralNet.h:374
const Double_t sigma
double stdDev_corr() const
Definition: NeuralNet.h:145
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition: NeuralNet.h:826
double var() const
Definition: NeuralNet.h:128
EnumFunction activationFunctionType() const
get the activation function type for this layer
Definition: NeuralNet.h:698
std::vector< double > m_output
Definition: NeuralNet.h:1003
EColor
Definition: Rtypes.h:60
size_t testRepetitions() const
how often is the test data tested
Definition: NeuralNet.h:775
iterator_type valuesBegin()
returns iterator to the begin of the (node) values
Definition: NeuralNet.h:589
double stdDev() const
Definition: NeuralNet.h:146
void setMonitoring(std::shared_ptr< Monitoring > ptrMonitoring)
prepared for monitoring
Definition: NeuralNet.h:771
std::vector< Pattern > * m_pResultPatternContainer
Definition: NeuralNet.h:1016
std::vector< double > m_deltas
stores the deltas for the DNN training
Definition: NeuralNet.h:646
double m_minProgress
current limits for the progress bar
Definition: NeuralNet.h:840
iterator_type gradientsBegin()
returns iterator to the begin of the gradients
Definition: NeuralNet.h:607
double weights() const
Definition: NeuralNet.h:126
size_t dropRepetitions() const
Definition: NeuralNet.h:768
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition: NeuralNet.h:593
static double C[]
size_t m_testRepetitions
Definition: NeuralNet.h:846
container_type::iterator iterator_type
Definition: NeuralNet.h:442
const std::vector< Layer > & layers() const
returns the layers (structure)
Definition: NeuralNet.h:1252
std::vector< Pattern >::const_iterator const_iterator
Definition: NeuralNet.h:238
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition: NeuralNet.h:795
double factorWeightDecay() const
get the weight-decay factor
Definition: NeuralNet.h:776
RooCmdArg Minimizer(const char *type, const char *alg=0)
ModeOutputValues operator &=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:205
LayerData(LayerData &&other)
move c&#39;tor of LayerData
Definition: NeuralNet.h:542
size_t m_convergenceCount
Definition: NeuralNet.h:864
std::vector< double > container_type
Definition: NeuralNet.h:440
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
Definition: NeuralNet.icc:451
Timer m_timer
timer for monitoring
Definition: NeuralNet.h:839
size_t minError() const
returns the smallest error so far
Definition: NeuralNet.h:836
size_t numNodes() const
return the number of nodes of this layer
Definition: NeuralNet.h:692
unsigned int UInt_t
Definition: RtypesCore.h:42
double m_factorWeightDecay
Definition: NeuralNet.h:847
Double_t E()
Definition: TMath.h:54
LayerData(const LayerData &other)
copy c&#39;tor of LayerData
Definition: NeuralNet.h:519
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:213
void modeOutputValues(ModeOutputValues eModeOutputValues)
set the mode-output-value
Definition: NeuralNet.h:690
void pads(int numPads)
preparation for monitoring
Definition: NeuralNet.h:825
Settings for the training of the neural net.
Definition: NeuralNet.h:736
virtual void startTrainCycle()
Definition: NeuralNet.h:789
virtual void drawSample(const std::vector< double > &, const std::vector< double > &, const std::vector< double > &, double)
callback for monitoring and loggging
Definition: NeuralNet.h:814
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
fetch the inverse activation function for this layer
Definition: NeuralNet.h:696
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition: NeuralNet.h:1056
ModeErrorFunction
error functions to be chosen from
Definition: NeuralNet.h:1045
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition: NeuralNet.h:377
Layer defines the layout of a layer.
Definition: NeuralNet.h:676
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
const_dropout_iterator m_itDropOut
iterator to a container indicating if the corresponding node is to be dropped
Definition: NeuralNet.h:649
Steepest(double learningRate=1e-4, double momentum=0.5, size_t repetitions=10)
c&#39;tor
Definition: NeuralNet.h:349
bool m_hasWeights
does this layer have weights (it does not if it is the input layer)
Definition: NeuralNet.h:659
std::vector< double > m_ams
Definition: NeuralNet.h:1007
EnumRegularization m_regularization
Definition: NeuralNet.h:854
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition: NeuralNet.h:587
void clearDropOut()
clear the drop-out-data for this layer
Definition: NeuralNet.h:624
ModeOutputValues
Definition: NeuralNet.h:179
double gaussDouble(double mean, double sigma)
Definition: NeuralNet.cxx:13
MinimizerType minimizerType() const
which minimizer shall be used (e.g. SGD)
Definition: NeuralNet.h:781
The Batch class encapsulates one mini-batch.
Definition: NeuralNet.h:235
TText * text
double m_beta
internal parameter (momentum)
Definition: NeuralNet.h:373
int type
Definition: TGX11.cxx:120
double mean() const
Definition: NeuralNet.h:127
container_type::iterator iterator_type
Definition: NeuralNet.h:1073
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition: NeuralNet.h:835
Double_t y[n]
Definition: legend1.C:17
std::vector< double > m_targets
Definition: NeuralNet.h:1004
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
Definition: NeuralNet.icc:539
double uniformDouble(double minValue, double maxValue)
Definition: NeuralNet.cxx:21
std::vector< double > m_values
stores the values of the nodes in this layer
Definition: NeuralNet.h:648
std::vector< double > m_weights
Definition: NeuralNet.h:1005
const_iterator_type deltasEnd() const
returns const iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:599
size_t m_maxConvergenceCount
Definition: NeuralNet.h:865
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:596
void addLayer(Layer &layer)
add a layer (layout)
Definition: NeuralNet.h:1101
This class is needed by JsMVA, and it&#39;s a helper class for tracking errors during the training in Jup...
Definition: MethodBase.h:102
Abstract ClassifierFactory template that handles arbitrary types.
size_t size() const
return the size of the layer
Definition: NeuralNet.h:629
const_iterator_type m_itInputEnd
iterator to the end of the nodes in the input node vector
Definition: NeuralNet.h:644
std::shared_ptr< std::function< double(double)> > m_activationFunction
activation function for this layer
Definition: NeuralNet.h:655
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
inverse activation function for this layer
Definition: NeuralNet.h:656
virtual void setProgressLimits(double minProgress=0, double maxProgress=100)
Definition: NeuralNet.h:797
size_t size() const
Definition: NeuralNet.h:248
size_t batchSize() const
mini-batch size
Definition: NeuralNet.h:774
virtual void endTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:812
#define NULL
Definition: Rtypes.h:82
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition: NeuralNet.h:376
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition: NeuralNet.h:601
const_iterator m_itBegin
iterator denoting the beginning of the batch
Definition: NeuralNet.h:251
std::vector< double > container_type
Definition: NeuralNet.h:1072
void clear()
clear the values and the deltas
Definition: NeuralNet.h:580
Settings for classificationused to distinguish between different function signatures.
Definition: NeuralNet.h:901
const_dropout_iterator dropOut() const
return the begin of the drop-out information
Definition: NeuralNet.h:627
void clear(std::string histoName)
for monitoring
Definition: NeuralNet.h:831
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition: NeuralNet.h:816
const_iterator_type deltasBegin() const
returns const iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:598
const_iterator begin() const
Definition: NeuralNet.h:245
Batch(typename std::vector< Pattern >::const_iterator itBegin, typename std::vector< Pattern >::const_iterator itEnd)
Definition: NeuralNet.h:240
DropContainer::const_iterator const_dropout_iterator
Definition: NeuralNet.h:449
double m_alpha
internal parameter (learningRate)
Definition: NeuralNet.h:372
EnumRegularization
Definition: NeuralNet.h:173
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition: NeuralNet.h:586
void setDropOut(Iterator itDrop)
set the drop-out info for this layer
Definition: NeuralNet.h:618
void addPoint(std::string histoName, double x, double y)
for monitoring
Definition: NeuralNet.h:829
size_t outputSize() const
output size of the DNN
Definition: NeuralNet.h:1106
ModeOutputValues operator|=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:194
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:405
bool exists(std::string histoName)
for monitoring
Definition: NeuralNet.h:832
TRandom3 R
a TMatrixD.
Definition: testIO.cxx:28
static double Q[]
LayerData holds the data of one layer.
Definition: NeuralNet.h:437
char name[80]
Definition: TGX11.cxx:109
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition: NeuralNet.h:788
Net(const Net &other)
d&#39;tor
Definition: NeuralNet.h:1092
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition: NeuralNet.h:1301
int repetitions() const
how many steps have to be gone until the batch is changed
Definition: NeuralNet.h:780
const_iterator m_itEnd
iterator denoting the end of the batch
Definition: NeuralNet.h:252
int randomInt(int maxValue)
Definition: NeuralNet.cxx:30
std::shared_ptr< std::function< double(double)> > m_activationFunction
stores the activation function
Definition: NeuralNet.h:703
std::vector< double > m_input
Definition: NeuralNet.h:1002