Logo ROOT  
Reference Guide
NeuralNet.h
Go to the documentation of this file.
1/**
2 * @file NeuralNet
3 * @author Peter Speckmayer
4 * @version 1.0
5 *
6 * @section LICENSE
7 *
8 *
9 * @section Neural net implementation
10 *
11 * An implementation of a neural net for TMVA. This neural net uses multithreading
12 *
13 */
14
15
16//////////////////////////////////////////////////////////////////////////
17// //
18// NeuralNet //
19// //
20// A neural net implementation //
21// //
22//////////////////////////////////////////////////////////////////////////
23
24#ifndef TMVA_NEURAL_NET
25#define TMVA_NEURAL_NET
26#pragma once
27
28#include <vector>
29#include <iostream>
30#include <fstream>
31#include <algorithm>
32#include <iterator>
33#include <functional>
34#include <tuple>
35#include <cmath>
36#include <cassert>
37#include <random>
38#include <thread>
39#include <future>
40#include <type_traits>
41
42#include "Pattern.h"
43#include "Monitoring.h"
44
45#include "TApplication.h"
46#include "Timer.h"
47
48#include "TH1F.h"
49#include "TH2F.h"
50
51#include <fenv.h> // turn on or off exceptions for NaN and other numeric exceptions
52
53
54namespace TMVA
55{
56
57 class IPythonInteractive;
58
59 namespace DNN
60 {
61
62 // double gaussDoubl (edouble mean, double sigma);
63
64
65
66 double gaussDouble (double mean, double sigma);
67 double uniformDouble (double minValue, double maxValue);
68 int randomInt (int maxValue);
69
70
71
72
74 {
75 public:
77 : m_n(0)
78 , m_sumWeights(0)
79 , m_mean(0)
80 , m_squared(0)
81 {}
82
83 inline void clear()
84 {
85 m_n = 0;
86 m_sumWeights = 0;
87 m_mean = 0;
88 m_squared = 0;
89 }
90
91 template <typename T>
92 inline void add(T value, double weight = 1.0)
93 {
94 ++m_n; // a value has been added
95
96 if (m_n == 1) // initialization
97 {
98 m_mean = value;
99 m_squared = 0.0;
100 m_sumWeights = weight;
101 return;
102 }
103
104 double tmpWeight = m_sumWeights+weight;
105 double Q = value - m_mean;
106
107 double R = Q*weight/tmpWeight;
108 m_mean += R;
110
111 m_sumWeights = tmpWeight;
112 }
113
114 template <typename ITERATOR>
115 inline void add (ITERATOR itBegin, ITERATOR itEnd)
116 {
117 for (ITERATOR it = itBegin; it != itEnd; ++it)
118 add (*it);
119 }
120
121
122
123 inline int count() const { return m_n; }
124 inline double weights() const { if(m_n==0) return 0; return m_sumWeights; }
125 inline double mean() const { if(m_n==0) return 0; return m_mean; }
126 inline double var() const
127 {
128 if(m_n==0)
129 return 0;
130 if (m_squared <= 0)
131 return 0;
132 return (m_squared/m_sumWeights);
133 }
134
135 inline double var_corr () const
136 {
137 if (m_n <= 1)
138 return var ();
139
140 return (var()*m_n/(m_n-1)); // unbiased for small sample sizes
141 }
142
143 inline double stdDev_corr () const { return sqrt( var_corr() ); }
144 inline double stdDev () const { return sqrt( var() ); } // unbiased for small sample sizes
145
146 private:
147 size_t m_n;
149 double m_mean;
150 double m_squared;
151 };
152
153
154
155 enum class EnumFunction
156 {
157 ZERO = '0',
158 LINEAR = 'L',
159 TANH = 'T',
160 RELU = 'R',
161 SYMMRELU = 'r',
162 TANHSHIFT = 't',
163 SIGMOID = 's',
164 SOFTSIGN = 'S',
165 GAUSS = 'G',
166 GAUSSCOMPLEMENT = 'C'
167 };
168
169
170
172 {
173 NONE, L1, L2, L1MAX
174 };
175
176
177 enum class ModeOutputValues : int
178 {
179 DIRECT = 0x01,
180 SIGMOID = 0x02,
181 SOFTMAX = 0x04,
182 BATCHNORMALIZATION = 0x08
183 };
184
185
186
188 {
190 }
191
193 {
195 return lhs;
196 }
197
199 {
201 }
202
204 {
206 return lhs;
207 }
208
209
210 template <typename T>
211 bool isFlagSet (T flag, T value)
212 {
213 return (int)(value & flag) != 0;
214 }
215
216
217
218 class Net;
219
220
221
222
223
224
225
226 typedef std::vector<char> DropContainer;
227
228
229 /*! \brief The Batch class encapsulates one mini-batch
230 *
231 * Holds a const_iterator to the beginning and the end of one batch in a vector of Pattern
232 */
233 class Batch
234 {
235 public:
236 typedef typename std::vector<Pattern>::const_iterator const_iterator;
237
238 Batch (typename std::vector<Pattern>::const_iterator itBegin, typename std::vector<Pattern>::const_iterator itEnd)
239 : m_itBegin (itBegin)
240 , m_itEnd (itEnd)
241 {}
242
243 const_iterator begin () const { return m_itBegin; }
244 const_iterator end () const { return m_itEnd; }
245
246 size_t size () const { return std::distance (begin (), end ()); }
247
248 private:
249 const_iterator m_itBegin; ///< iterator denoting the beginning of the batch
250 const_iterator m_itEnd; ///< iterator denoting the end of the batch
251 };
252
253
254
255
256
257
258 template <typename ItSource, typename ItWeight, typename ItTarget>
259 void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd);
260
261
262
263 template <typename ItSource, typename ItWeight, typename ItPrev>
264 void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd);
265
266
267
268
269
270 template <typename ItValue, typename ItFunction>
271 void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction);
272
273
274 template <typename ItValue, typename ItFunction, typename ItInverseFunction, typename ItGradient>
275 void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction, ItInverseFunction itInverseFunction, ItGradient itGradient);
276
277
278
279 template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
280 void update (ItSource itSource, ItSource itSourceEnd,
281 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
282 ItTargetGradient itTargetGradientBegin,
283 ItGradient itGradient);
284
285
286
287 template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
288 void update (ItSource itSource, ItSource itSourceEnd,
289 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
290 ItTargetGradient itTargetGradientBegin,
291 ItGradient itGradient,
292 ItWeight itWeight, double weightDecay);
293
294
295
296 // ----- signature of a minimizer -------------
297 // class Minimizer
298 // {
299 // public:
300
301 // template <typename Function, typename Variables, typename PassThrough>
302 // double operator() (Function& fnc, Variables& vars, PassThrough& passThrough)
303 // {
304 // // auto itVars = begin (vars);
305 // // auto itVarsEnd = end (vars);
306
307 // std::vector<double> myweights;
308 // std::vector<double> gradients;
309
310 // double value = fnc (passThrough, myweights);
311 // value = fnc (passThrough, myweights, gradients);
312 // return value;
313 // }
314 // };
315
316
317
318 ///< list all the minimizer types
320 {
321 fSteepest ///< SGD
322 };
323
324
325
326
327
328 /*! \brief Steepest Gradient Descent algorithm (SGD)
329 *
330 * Implements a steepest gradient descent minimization algorithm
331 */
333 {
334 public:
335
337
338
339 /*! \brief c'tor
340 *
341 * C'tor
342 *
343 * \param learningRate denotes the learning rate for the SGD algorithm
344 * \param momentum fraction of the velocity which is taken over from the last step
345 * \param repetitions re-compute the gradients each "repetitions" steps
346 */
347 Steepest (double learningRate = 1e-4,
348 double momentum = 0.5,
349 size_t repetitions = 10)
350 : m_repetitions (repetitions)
351 , m_alpha (learningRate)
352 , m_beta (momentum)
353 {}
354
355 /*! \brief operator to call the steepest gradient descent algorithm
356 *
357 * entry point to start the minimization procedure
358 *
359 * \param fitnessFunction (templated) function which has to be provided. This function is minimized
360 * \param weights (templated) a reference to a container of weights. The result of the minimization procedure
361 * is returned via this reference (needs to support std::begin and std::end
362 * \param passThrough (templated) object which can hold any data which the fitness function needs. This object
363 * is not touched by the minimizer; This object is provided to the fitness function when
364 * called
365 */
366 template <typename Function, typename Weights, typename PassThrough>
367 double operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough);
368
369
370 double m_alpha; ///< internal parameter (learningRate)
371 double m_beta; ///< internal parameter (momentum)
372 std::vector<double> m_prevGradients; ///< vector remembers the gradients of the previous step
373
374 std::vector<double> m_localWeights; ///< local weights for reuse in thread.
375 std::vector<double> m_localGradients; ///< local gradients for reuse in thread.
376 };
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395 template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
396 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
397
398
399
400 template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
401 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
402
403
404
405
406 template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
407 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
408
409
410
411
412
413 template <typename ItWeight>
414 double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization);
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429 /*! \brief LayerData holds the data of one layer
430 *
431 * LayerData holds the data of one layer, but not its layout
432 *
433 *
434 */
436 {
437 public:
438 typedef std::vector<double> container_type;
439
440 typedef container_type::iterator iterator_type;
441 typedef container_type::const_iterator const_iterator_type;
442
443 typedef std::vector<std::function<double(double)> > function_container_type;
444 typedef function_container_type::iterator function_iterator_type;
445 typedef function_container_type::const_iterator const_function_iterator_type;
446
447 typedef DropContainer::const_iterator const_dropout_iterator;
448
449 /*! \brief c'tor of LayerData
450 *
451 * C'tor of LayerData for the input layer
452 *
453 * \param itInputBegin iterator to the begin of a vector which holds the values of the nodes of the neural net
454 * \param itInputEnd iterator to the end of a vector which holdsd the values of the nodes of the neural net
455 * \param eModeOutput indicates a potential tranformation of the output values before further computation
456 * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
457 * output value (to create a probability); SOFTMAX applies a softmax transformation to all
458 * output values (mutually exclusive probability)
459 */
461
462
463 /*! \brief c'tor of LayerData
464 *
465 * C'tor of LayerData for the input layer
466 *
467 * \param inputSize input size of this layer
468 */
469 LayerData (size_t inputSize);
471
472
473 /*! \brief c'tor of LayerData
474 *
475 * C'tor of LayerData for all layers which are not the input layer; Used during the training of the DNN
476 *
477 * \param size size of the layer
478 * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
479 * \param itGradientBegin indicates the start of the gradients for this layer on the gradient vector
480 * \param itFunctionBegin indicates the start of the vector of activation functions for this layer on the
481 * activation function vector
482 * \param itInverseFunctionBegin indicates the start of the vector of activation functions for this
483 * layer on the activation function vector
484 * \param eModeOutput indicates a potential tranformation of the output values before further computation
485 * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
486 * output value (to create a probability); SOFTMAX applies a softmax transformation to all
487 * output values (mutually exclusive probability)
488 */
489 LayerData (size_t size,
490 const_iterator_type itWeightBegin,
491 iterator_type itGradientBegin,
492 std::shared_ptr<std::function<double(double)>> activationFunction,
493 std::shared_ptr<std::function<double(double)>> inverseActivationFunction,
495
496 /*! \brief c'tor of LayerData
497 *
498 * C'tor of LayerData for all layers which are not the input layer; Used during the application of the DNN
499 *
500 * \param size size of the layer
501 * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
502 * \param itFunctionBegin indicates the start of the vector of activation functions for this layer on the
503 * activation function vector
504 * \param eModeOutput indicates a potential tranformation of the output values before further computation
505 * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
506 * output value (to create a probability); SOFTMAX applies a softmax transformation to all
507 * output values (mutually exclusive probability)
508 */
509 LayerData (size_t size, const_iterator_type itWeightBegin,
510 std::shared_ptr<std::function<double(double)>> activationFunction,
512
513 /*! \brief copy c'tor of LayerData
514 *
515 *
516 */
517 LayerData (const LayerData& other)
518 : m_size (other.m_size)
520 , m_itInputEnd (other.m_itInputEnd)
521 , m_deltas (other.m_deltas)
523 , m_values (other.m_values)
524 , m_itDropOut (other.m_itDropOut)
525 , m_hasDropOut (other.m_hasDropOut)
531 , m_hasWeights (other.m_hasWeights)
534 {}
535
536 /*! \brief move c'tor of LayerData
537 *
538 *
539 */
541 : m_size (other.m_size)
543 , m_itInputEnd (other.m_itInputEnd)
544 , m_deltas (std::move(other.m_deltas))
545 , m_valueGradients (std::move(other.m_valueGradients))
546 , m_values (std::move(other.m_values))
547 , m_itDropOut (other.m_itDropOut)
548 , m_hasDropOut (other.m_hasDropOut)
551 , m_activationFunction (std::move(other.m_activationFunction))
554 , m_hasWeights (other.m_hasWeights)
557 {}
558
559
560 /*! \brief change the input iterators
561 *
562 *
563 * \param itInputBegin indicates the start of the input node vector
564 * \param itInputEnd indicates the end of the input node vector
565 *
566 */
567 void setInput (const_iterator_type itInputBegin, const_iterator_type itInputEnd)
568 {
569 m_isInputLayer = true;
570 m_itInputBegin = itInputBegin;
571 m_itInputEnd = itInputEnd;
572 }
573
574 /*! \brief clear the values and the deltas
575 *
576 *
577 */
578 void clear ()
579 {
580 m_values.assign (m_values.size (), 0.0);
581 m_deltas.assign (m_deltas.size (), 0.0);
582 }
583
584 const_iterator_type valuesBegin () const { return m_isInputLayer ? m_itInputBegin : begin (m_values); } ///< returns const iterator to the begin of the (node) values
585 const_iterator_type valuesEnd () const { return m_isInputLayer ? m_itInputEnd : end (m_values); } ///< returns iterator to the end of the (node) values
586
587 iterator_type valuesBegin () { assert (!m_isInputLayer); return begin (m_values); } ///< returns iterator to the begin of the (node) values
588 iterator_type valuesEnd () { assert (!m_isInputLayer); return end (m_values); } ///< returns iterator to the end of the (node) values
589
590 ModeOutputValues outputMode () const { return m_eModeOutput; } ///< returns the output mode
591 container_type probabilities () const { return computeProbabilities (); } ///< computes the probabilities from the current node values and returns them
592
593 iterator_type deltasBegin () { return begin (m_deltas); } ///< returns iterator to the begin of the deltas (back-propagation)
594 iterator_type deltasEnd () { return end (m_deltas); } ///< returns iterator to the end of the deltas (back-propagation)
595
596 const_iterator_type deltasBegin () const { return begin (m_deltas); } ///< returns const iterator to the begin of the deltas (back-propagation)
597 const_iterator_type deltasEnd () const { return end (m_deltas); } ///< returns const iterator to the end of the deltas (back-propagation)
598
599 iterator_type valueGradientsBegin () { return begin (m_valueGradients); } ///< returns iterator to the begin of the gradients of the node values
600 iterator_type valueGradientsEnd () { return end (m_valueGradients); } ///< returns iterator to the end of the gradients of the node values
601
602 const_iterator_type valueGradientsBegin () const { return begin (m_valueGradients); } ///< returns const iterator to the begin of the gradients
603 const_iterator_type valueGradientsEnd () const { return end (m_valueGradients); } ///< returns const iterator to the end of the gradients
604
605 iterator_type gradientsBegin () { assert (m_hasGradients); return m_itGradientBegin; } ///< returns iterator to the begin of the gradients
606 const_iterator_type gradientsBegin () const { assert (m_hasGradients); return m_itGradientBegin; } ///< returns const iterator to the begin of the gradients
607 const_iterator_type weightsBegin () const { assert (m_hasWeights); return m_itConstWeightBegin; } ///< returns const iterator to the begin of the weights for this layer
608
609 std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; }
610 std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; }
611
612 /*! \brief set the drop-out info for this layer
613 *
614 */
615 template <typename Iterator>
616 void setDropOut (Iterator itDrop) { m_itDropOut = itDrop; m_hasDropOut = true; }
617
618 /*! \brief clear the drop-out-data for this layer
619 *
620 *
621 */
622 void clearDropOut () { m_hasDropOut = false; }
623
624 bool hasDropOut () const { return m_hasDropOut; } ///< has this layer drop-out turned on?
625 const_dropout_iterator dropOut () const { assert (m_hasDropOut); return m_itDropOut; } ///< return the begin of the drop-out information
626
627 size_t size () const { return m_size; } ///< return the size of the layer
628
629 private:
630
631 /*! \brief compute the probabilities from the node values
632 *
633 *
634 */
636
637 private:
638
639 size_t m_size; ////< layer size
640
641 const_iterator_type m_itInputBegin; ///< iterator to the first of the nodes in the input node vector
642 const_iterator_type m_itInputEnd; ///< iterator to the end of the nodes in the input node vector
643
644 std::vector<double> m_deltas; ///< stores the deltas for the DNN training
645 std::vector<double> m_valueGradients; ///< stores the gradients of the values (nodes)
646 std::vector<double> m_values; ///< stores the values of the nodes in this layer
647 const_dropout_iterator m_itDropOut; ///< iterator to a container indicating if the corresponding node is to be dropped
648 bool m_hasDropOut; ///< dropOut is turned on?
649
650 const_iterator_type m_itConstWeightBegin; ///< const iterator to the first weight of this layer in the weight vector
651 iterator_type m_itGradientBegin; ///< iterator to the first gradient of this layer in the gradient vector
652
653 std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< activation function for this layer
654 std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< inverse activation function for this layer
655
656 bool m_isInputLayer; ///< is this layer an input layer
657 bool m_hasWeights; ///< does this layer have weights (it does not if it is the input layer)
658 bool m_hasGradients; ///< does this layer have gradients (only if in training mode)
659
660 ModeOutputValues m_eModeOutput; ///< stores the output mode (DIRECT, SIGMOID, SOFTMAX)
661
662 };
663
664
665
666
667
668 /*! \brief Layer defines the layout of a layer
669 *
670 * Layer defines the layout of a specific layer in the DNN
671 * Objects of this class don't hold the layer data itself (see class "LayerData")
672 *
673 */
674 class Layer
675 {
676 public:
677
678 /*! \brief c'tor for defining a Layer
679 *
680 *
681 * \param itInputBegin indicates the start of the input node vector
682 * \param itInputEnd indicates the end of the input node vector
683 *
684 */
686
687 ModeOutputValues modeOutputValues () const { return m_eModeOutputValues; } ///< get the mode-output-value (direct, probabilities)
688 void modeOutputValues (ModeOutputValues eModeOutputValues) { m_eModeOutputValues = eModeOutputValues; } ///< set the mode-output-value
689
690 size_t numNodes () const { return m_numNodes; } ///< return the number of nodes of this layer
691 size_t numWeights (size_t numInputNodes) const { return numInputNodes * numNodes (); } ///< return the number of weights for this layer (fully connected)
692
693 std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; } ///< fetch the activation function for this layer
694 std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; } ///< fetch the inverse activation function for this layer
695
696 EnumFunction activationFunctionType () const { return m_activationFunctionType; } ///< get the activation function type for this layer
697
698 private:
699
700
701 std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< stores the activation function
702 std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< stores the inverse activation function
703
704
706
707 ModeOutputValues m_eModeOutputValues; ///< do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned as such
709
710 friend class Net;
711 };
712
713
714
715
716
717 template <typename LAYERDATA>
718 void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
719
720
721 template <typename LAYERDATA>
722 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
723
724
725 template <typename LAYERDATA>
726 void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double weightDecay, EnumRegularization regularization);
727
728
729
730 /*! \brief Settings for the training of the neural net
731 *
732 *
733 */
735 {
736 public:
737
738 /*! \brief c'tor
739 *
740 *
741 */
743 size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
744 double _factorWeightDecay = 1e-5, TMVA::DNN::EnumRegularization _regularization = TMVA::DNN::EnumRegularization::NONE,
745 MinimizerType _eMinimizerType = MinimizerType::fSteepest,
746 double _learningRate = 1e-5, double _momentum = 0.3,
747 int _repetitions = 3,
748 bool _multithreading = true);
749
750 /*! \brief d'tor
751 *
752 *
753 */
754 virtual ~Settings ();
755
756
757 /*! \brief set the drop-out configuration (layer-wise)
758 *
759 * \param begin begin of an array or vector denoting the drop-out probabilities for each layer
760 * \param end end of an array or vector denoting the drop-out probabilities for each layer
761 * \param _dropRepetitions denotes after how many repetitions the drop-out setting (which nodes are dropped out exactly) is changed
762 */
763 template <typename Iterator>
764 void setDropOut (Iterator begin, Iterator end, size_t _dropRepetitions) { m_dropOut.assign (begin, end); m_dropRepetitions = _dropRepetitions; }
765
766 size_t dropRepetitions () const { return m_dropRepetitions; }
767 const std::vector<double>& dropFractions () const { return m_dropOut; }
768
769 void setMonitoring (std::shared_ptr<Monitoring> ptrMonitoring) { fMonitoring = ptrMonitoring; } ///< prepared for monitoring
770
771 size_t convergenceSteps () const { return m_convergenceSteps; } ///< how many steps until training is deemed to have converged
772 size_t batchSize () const { return m_batchSize; } ///< mini-batch size
773 size_t testRepetitions () const { return m_testRepetitions; } ///< how often is the test data tested
774 double factorWeightDecay () const { return m_factorWeightDecay; } ///< get the weight-decay factor
775
776 double learningRate () const { return fLearningRate; } ///< get the learning rate
777 double momentum () const { return fMomentum; } ///< get the momentum (e.g. for SGD)
778 int repetitions () const { return fRepetitions; } ///< how many steps have to be gone until the batch is changed
779 MinimizerType minimizerType () const { return fMinimizerType; } ///< which minimizer shall be used (e.g. SGD)
780
781
782
783
784
785
786 virtual void testSample (double /*error*/, double /*output*/, double /*target*/, double /*weight*/) {} ///< virtual function to be used for monitoring (callback)
787 virtual void startTrainCycle () ///< callback for monitoring and logging
788 {
791 m_minError = 1e10;
792 }
793 virtual void endTrainCycle (double /*error*/) {} ///< callback for monitoring and logging
794
795 virtual void setProgressLimits (double minProgress = 0, double maxProgress = 100) ///< for monitoring and logging (set the current "progress" limits for the display of the progress)
796 {
797 m_minProgress = minProgress;
798 m_maxProgress = maxProgress;
799 }
800 virtual void startTraining () ///< start drawing the progress bar
801 {
803 }
804 virtual void cycle (double progress, TString text) ///< advance on the progress bar
805 {
807 }
808
809 virtual void startTestCycle () {} ///< callback for monitoring and loggging
810 virtual void endTestCycle () {} ///< callback for monitoring and loggging
811 virtual void testIteration () {} ///< callback for monitoring and loggging
812 virtual void drawSample (const std::vector<double>& /*input*/, const std::vector<double>& /* output */, const std::vector<double>& /* target */, double /* patternWeight */) {} ///< callback for monitoring and loggging
813
814 virtual void computeResult (const Net& /* net */, std::vector<double>& /* weights */) {} ///< callback for monitoring and loggging
815
816 virtual bool hasConverged (double testError); ///< has this training converged already?
817
818 EnumRegularization regularization () const { return m_regularization; } ///< some regularization of the DNN is turned on?
819
820 bool useMultithreading () const { return m_useMultithreading; } ///< is multithreading turned on?
821
822
823 void pads (int numPads) { if (fMonitoring) fMonitoring->pads (numPads); } ///< preparation for monitoring
824 void create (std::string histoName, int bins, double min, double max) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max); } ///< for monitoring
825 void create (std::string histoName, int bins, double min, double max, int bins2, double min2, double max2) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max, bins2, min2, max2); } ///< for monitoring
826 void addPoint (std::string histoName, double x) { if (fMonitoring) fMonitoring->addPoint (histoName, x); } ///< for monitoring
827 void addPoint (std::string histoName, double x, double y) {if (fMonitoring) fMonitoring->addPoint (histoName, x, y); } ///< for monitoring
828 void plot (std::string histoName, std::string options, int pad, EColor color) { if (fMonitoring) fMonitoring->plot (histoName, options, pad, color); } ///< for monitoring
829 void clear (std::string histoName) { if (fMonitoring) fMonitoring->clear (histoName); } ///< for monitoring
830 bool exists (std::string histoName) { if (fMonitoring) return fMonitoring->exists (histoName); return false; } ///< for monitoring
831
832 size_t convergenceCount () const { return m_convergenceCount; } ///< returns the current convergence count
833 size_t maxConvergenceCount () const { return m_maxConvergenceCount; } ///< returns the max convergence count so far
834 size_t minError () const { return m_minError; } ///< returns the smallest error so far
835
836 public:
837 Timer m_timer; ///< timer for monitoring
838 double m_minProgress; ///< current limits for the progress bar
839 double m_maxProgress; ///< current limits for the progress bar
840
841
842 size_t m_convergenceSteps; ///< number of steps without improvement to consider the DNN to have converged
843 size_t m_batchSize; ///< mini-batch size
846
847 size_t count_E;
848 size_t count_dE;
851
853
855 std::vector<double> m_dropOut;
856
858 double fMomentum;
861
865
866
867 protected:
869
870 std::shared_ptr<Monitoring> fMonitoring;
871 };
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895 /*! \brief Settings for classification
896 *
897 * contains additional settings if the DNN problem is classification
898 */
900 {
901 public:
902 /*! \brief c'tor
903 *
904 *
905 */
907 size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
908 double _factorWeightDecay = 1e-5, EnumRegularization _regularization = EnumRegularization::NONE,
909 size_t _scaleToNumEvents = 0, MinimizerType _eMinimizerType = MinimizerType::fSteepest,
910 double _learningRate = 1e-5, double _momentum = 0.3, int _repetitions = 3,
911 bool _useMultithreading = true)
912 : Settings (name, _convergenceSteps, _batchSize, _testRepetitions, _factorWeightDecay,
913 _regularization, _eMinimizerType, _learningRate, _momentum, _repetitions, _useMultithreading)
914 , m_ams ()
917 , m_scaleToNumEvents (_scaleToNumEvents)
918 , m_cutValue (10.0)
922 {
923 }
924
925 /*! \brief d'tor
926 *
927 *
928 */
930 {
931 }
932
933 void startTrainCycle ();
934 void endTrainCycle (double /*error*/);
935 void testIteration () { if (fMonitoring) fMonitoring->ProcessEvents (); }
936
937
938 /* void createHistograms () */
939 /* { */
940 /* std::cout << "is hist ROC existing?" << std::endl; */
941 /* if (m_histROC) */
942 /* { */
943 /* std::cout << "--> yes" << std::endl; */
944 /* fMonitoring->ProcessEvents (); */
945 /* return; */
946 /* } */
947
948 /* std::cout << "create histograms" << std::endl; */
949 /* TCanvas* canvas = fMonitoring->GetCanvas (); */
950 /* if (canvas) */
951 /* { */
952 /* std::cout << "canvas divide" << std::endl; */
953 /* canvas->cd (); */
954 /* canvas->Divide (2,2); */
955 /* } */
956 /* if (!m_histROC) */
957 /* { */
958 /* m_histROC = new TH2F ("ROC","ROC", 1000, 0, 1.0, 1000, 0, 1.0); m_histROC->SetDirectory (0); */
959 /* m_histROC->SetLineColor (kBlue); */
960 /* } */
961 /* if (!m_histSignificance) */
962 /* { */
963 /* m_histSignificance = new TH2F ("Significance", "Significance", 1000, 0,1.0, 5, 0.0, 2.0); */
964 /* m_histSignificance->SetDirectory (0); */
965 /* m_histSignificance->SetBit (TH1::kCanRebin); */
966 /* m_histROC->SetLineColor (kRed); */
967 /* } */
968 /* if (!m_histError) */
969 /* { */
970 /* m_histError = new TH1F ("Error", "Error", 100, 0, 100); */
971 /* m_histError->SetDirectory (0); */
972 /* m_histError->SetBit (TH1::kCanRebin); */
973 /* m_histROC->SetLineColor (kGreen); */
974 /* } */
975 /* if (!m_histOutputSignal) */
976 /* { */
977 /* m_histOutputSignal = new TH1F ("Signal", "Signal", 100, 0, 1.0); */
978 /* m_histOutputSignal->SetDirectory (0); */
979 /* m_histOutputSignal->SetBit (TH1::kCanRebin); */
980 /* } */
981 /* if (!m_histOutputBackground) */
982 /* { */
983 /* m_histOutputBackground = new TH1F ("Background", "Background", 100, 0, 1.0); */
984 /* m_histOutputBackground->SetDirectory (0); */
985 /* m_histOutputBackground->SetBit (TH1::kCanRebin); */
986 /* } */
987
988 /* fMonitoring->ProcessEvents (); */
989 /* } */
990
991 void testSample (double error, double output, double target, double weight);
992
993 virtual void startTestCycle ();
994 virtual void endTestCycle ();
995
996
997 void setWeightSums (double sumOfSigWeights, double sumOfBkgWeights);
998 void setResultComputation (std::string _fileNameNetConfig, std::string _fileNameResult, std::vector<Pattern>* _resultPatternContainer);
999
1000 std::vector<double> m_input;
1001 std::vector<double> m_output;
1002 std::vector<double> m_targets;
1003 std::vector<double> m_weights;
1004
1005 std::vector<double> m_ams;
1006 std::vector<double> m_significances;
1007
1008
1012
1014 std::vector<Pattern>* m_pResultPatternContainer;
1015 std::string m_fileNameResult;
1017
1018
1019 /* TH2F* m_histROC; */
1020 /* TH2F* m_histSignificance; */
1021
1022 /* TH1F* m_histError; */
1023 /* TH1F* m_histOutputSignal; */
1024 /* TH1F* m_histOutputBackground; */
1025 };
1026
1027
1028
1029
1030
1031
1032
1033 ///< used to distinguish between different function signatures
1034 enum class ModeOutput
1035 {
1036 FETCH
1037 };
1038
1039 /*! \brief error functions to be chosen from
1040 *
1041 *
1042 */
1044 {
1045 SUMOFSQUARES = 'S',
1046 CROSSENTROPY = 'C',
1048 };
1049
1050 /*! \brief weight initialization strategies to be chosen from
1051 *
1052 *
1053 */
1055 {
1057 };
1058
1059
1060
1061 /*! \brief neural net
1062 *
1063 * holds the structure of all layers and some data for the whole net
1064 * does not know the layer data though (i.e. values of the nodes and weights)
1065 */
1066 class Net
1067 {
1068 public:
1069
1070 typedef std::vector<double> container_type;
1071 typedef container_type::iterator iterator_type;
1072 typedef std::pair<iterator_type,iterator_type> begin_end_type;
1073
1074
1075 /*! \brief c'tor
1076 *
1077 *
1078 */
1081 , m_sizeInput (0)
1082 , m_layers ()
1083 {
1084 }
1085
1086 /*! \brief d'tor
1087 *
1088 *
1089 */
1090 Net (const Net& other)
1092 , m_sizeInput (other.m_sizeInput)
1093 , m_layers (other.m_layers)
1094 {
1095 }
1096
1097 void setInputSize (size_t sizeInput) { m_sizeInput = sizeInput; } ///< set the input size of the DNN
1098 void setOutputSize (size_t sizeOutput) { m_sizeOutput = sizeOutput; } ///< set the output size of the DNN
1099 void addLayer (Layer& layer) { m_layers.push_back (layer); } ///< add a layer (layout)
1100 void addLayer (Layer&& layer) { m_layers.push_back (layer); }
1101 void setErrorFunction (ModeErrorFunction eErrorFunction) { m_eErrorFunction = eErrorFunction; } ///< which error function is to be used
1102
1103 size_t inputSize () const { return m_sizeInput; } ///< input size of the DNN
1104 size_t outputSize () const { return m_sizeOutput; } ///< output size of the DNN
1105
1106 /*! \brief set the drop out configuration
1107 *
1108 *
1109 */
1110 template <typename WeightsType, typename DropProbabilities>
1111 void dropOutWeightFactor (WeightsType& weights,
1112 const DropProbabilities& drops,
1113 bool inverse = false);
1114
1115 /*! \brief start the training
1116 *
1117 * \param weights weight vector
1118 * \param trainPattern training pattern
1119 * \param testPattern test pattern
1120 * \param minimizer use this minimizer for training (e.g. SGD)
1121 * \param settings settings used for this training run
1122 */
1123 template <typename Minimizer>
1124 double train (std::vector<double>& weights,
1125 std::vector<Pattern>& trainPattern,
1126 const std::vector<Pattern>& testPattern,
1127 Minimizer& minimizer,
1128 Settings& settings);
1129
1130 /*! \brief pre-training for future use
1131 *
1132 *
1133 */
1134 template <typename Minimizer>
1135 void preTrain (std::vector<double>& weights,
1136 std::vector<Pattern>& trainPattern,
1137 const std::vector<Pattern>& testPattern,
1138 Minimizer& minimizer, Settings& settings);
1139
1140
1141 /*! \brief executes one training cycle
1142 *
1143 * \param minimizier the minimizer to be used
1144 * \param weights the weight vector to be used
1145 * \param itPatternBegin the pattern to be trained with
1146 * \param itPatternEnd the pattern to be trainied with
1147 * \param settings the settings for the training
1148 * \param dropContainer the configuration for DNN drop-out
1149 */
1150 template <typename Iterator, typename Minimizer>
1151 inline double trainCycle (Minimizer& minimizer, std::vector<double>& weights,
1152 Iterator itPatternBegin, Iterator itPatternEnd,
1153 Settings& settings,
1154 DropContainer& dropContainer);
1155
1156 size_t numWeights (size_t trainingStartLayer = 0) const; ///< returns the number of weights in this net
1157 size_t numNodes (size_t trainingStartLayer = 0) const; ///< returns the number of nodes in this net
1158
1159 template <typename Weights>
1160 std::vector<double> compute (const std::vector<double>& input, const Weights& weights) const; ///< compute the net with the given input and the given weights
1161
1162 template <typename Weights, typename PassThrough>
1163 double operator() (PassThrough& settingsAndBatch, const Weights& weights) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients
1164
1165 template <typename Weights, typename PassThrough, typename OutContainer>
1166 double operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput eFetch, OutContainer& outputContainer) const; ///< execute computation of the DNN for one mini-batch; helper function
1167
1168 template <typename Weights, typename Gradients, typename PassThrough>
1169 double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well
1170
1171 template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1172 double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const;
1173
1174
1175 template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1176 std::vector<std::vector<LayerData>> prepareLayerData (LayerContainer& layers,
1177 Batch& batch,
1178 const DropContainer& dropContainer,
1179 ItWeight itWeightBegin,
1180 ItWeight itWeightEnd,
1181 ItGradient itGradientBegin,
1182 ItGradient itGradientEnd,
1183 size_t& totalNumWeights) const;
1184
1185 template <typename LayerContainer>
1186 void forwardPattern (const LayerContainer& _layers,
1187 std::vector<LayerData>& layerData) const;
1188
1189
1190 template <typename LayerContainer, typename LayerPatternContainer>
1191 void forwardBatch (const LayerContainer& _layers,
1192 LayerPatternContainer& layerPatternData,
1193 std::vector<double>& valuesMean,
1194 std::vector<double>& valuesStdDev,
1195 size_t trainFromLayer) const;
1196
1197 template <typename OutputContainer>
1198 void fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const;
1199
1200 template <typename OutputContainer>
1201 void fetchOutput (const std::vector<LayerData>& layerPatternData, OutputContainer& outputContainer) const;
1202
1203
1204 template <typename ItWeight>
1205 std::tuple</*sumError*/double,/*sumWeights*/double> computeError (const Settings& settings,
1206 std::vector<LayerData>& lastLayerData,
1207 Batch& batch,
1208 ItWeight itWeightBegin,
1209 ItWeight itWeightEnd) const;
1210
1211 template <typename Settings>
1212 void backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1213 const Settings& settings,
1214 size_t trainFromLayer,
1215 size_t totalNumWeights) const;
1216
1217
1218
1219 /*! \brief main NN computation function
1220 *
1221 *
1222 */
1223 template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1224 double forward_backward (LayerContainer& layers, PassThrough& settingsAndBatch,
1225 ItWeight itWeightBegin, ItWeight itWeightEnd,
1226 ItGradient itGradientBegin, ItGradient itGradientEnd,
1227 size_t trainFromLayer,
1228 OutContainer& outputContainer, bool fetchOutput) const;
1229
1230
1231
1232 double E ();
1233 void dE ();
1234
1235
1236 /*! \brief computes the error of the DNN
1237 *
1238 *
1239 */
1240 template <typename Container, typename ItWeight>
1241 double errorFunction (LayerData& layerData,
1242 Container truth,
1243 ItWeight itWeight,
1244 ItWeight itWeightEnd,
1245 double patternWeight,
1246 double factorWeightDecay,
1247 EnumRegularization eRegularization) const;
1248
1249
1250 const std::vector<Layer>& layers () const { return m_layers; } ///< returns the layers (structure)
1251 std::vector<Layer>& layers () { return m_layers; } ///< returns the layers (structure)
1252
1253 void removeLayer () { m_layers.pop_back (); } ///< remove one layer
1254
1255
1256 void clear () ///< clear one layer
1257 {
1258 m_layers.clear ();
1260 }
1261
1262
1263 template <typename OutIterator>
1265 OutIterator itWeight); ///< initialize the weights with the given strategy
1266
1267 protected:
1268
1269 void fillDropContainer (DropContainer& dropContainer, double dropFraction, size_t numNodes) const; ///< prepare the drop-out-container (select the nodes which are to be dropped out)
1270
1271
1272 private:
1273
1274 ModeErrorFunction m_eErrorFunction; ///< denotes the error function
1275 size_t m_sizeInput; ///< input size of this DNN
1276 size_t m_sizeOutput; ///< outut size of this DNN
1277 std::vector<Layer> m_layers; ///< layer-structure-data
1278
1279 protected:
1280 // variables for JsMVA (interactive training in jupyter notebook)
1282 bool * fExitFromTraining = nullptr;
1283 UInt_t *fIPyMaxIter = nullptr, *fIPyCurrentIter = nullptr;
1284
1285 public:
1286
1287 // setup ipython interactive variables
1289 fInteractive = fI;
1290 fExitFromTraining = fE;
1291 fIPyMaxIter = M;
1293 }
1294 };
1295
1296
1297
1298
1299typedef std::tuple<Settings&, Batch&, DropContainer&> pass_through_type;
1300
1301
1302
1303
1304
1305
1306
1307 } // namespace DNN
1308} // namespace TMVA
1309
1310
1311// include the implementations (in header file, because they are templated)
1312#include "TMVA/NeuralNet.icc"
1313
1314#endif
1315
double
Definition: Converters.cxx:921
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
#define e(i)
Definition: RSha256.hxx:103
#define NONE
Definition: Rotated.cxx:52
int Int_t
Definition: RtypesCore.h:43
EColor
Definition: Rtypes.h:63
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
double sqrt(double)
Double_t(* Function)(Double_t)
Definition: Functor.C:4
The Batch class encapsulates one mini-batch.
Definition: NeuralNet.h:234
const_iterator m_itEnd
iterator denoting the end of the batch
Definition: NeuralNet.h:250
const_iterator begin() const
Definition: NeuralNet.h:243
const_iterator end() const
Definition: NeuralNet.h:244
Batch(typename std::vector< Pattern >::const_iterator itBegin, typename std::vector< Pattern >::const_iterator itEnd)
Definition: NeuralNet.h:238
size_t size() const
Definition: NeuralNet.h:246
std::vector< Pattern >::const_iterator const_iterator
Definition: NeuralNet.h:236
const_iterator m_itBegin
iterator denoting the beginning of the batch
Definition: NeuralNet.h:249
Settings for classificationused to distinguish between different function signatures.
Definition: NeuralNet.h:900
void startTrainCycle()
action to be done when the training cycle is started (e.g.
Definition: NeuralNet.cxx:281
void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:935
void endTrainCycle(double)
action to be done when the training cycle is ended (e.g.
Definition: NeuralNet.cxx:296
virtual void endTestCycle()
action to be done when the training cycle is ended (e.g.
Definition: NeuralNet.cxx:326
std::vector< Pattern > * m_pResultPatternContainer
Definition: NeuralNet.h:1014
void setResultComputation(std::string _fileNameNetConfig, std::string _fileNameResult, std::vector< Pattern > *_resultPatternContainer)
preparation for monitoring output
Definition: NeuralNet.cxx:523
ClassificationSettings(TString name, size_t _convergenceSteps=15, size_t _batchSize=10, size_t _testRepetitions=7, double _factorWeightDecay=1e-5, EnumRegularization _regularization=EnumRegularization::NONE, size_t _scaleToNumEvents=0, MinimizerType _eMinimizerType=MinimizerType::fSteepest, double _learningRate=1e-5, double _momentum=0.3, int _repetitions=3, bool _useMultithreading=true)
c'tor
Definition: NeuralNet.h:906
std::vector< double > m_input
Definition: NeuralNet.h:1000
std::vector< double > m_significances
Definition: NeuralNet.h:1006
std::vector< double > m_weights
Definition: NeuralNet.h:1003
virtual ~ClassificationSettings()
d'tor
Definition: NeuralNet.h:929
std::vector< double > m_targets
Definition: NeuralNet.h:1002
void testSample(double error, double output, double target, double weight)
action to be done after the computation of a test sample (e.g.
Definition: NeuralNet.cxx:304
virtual void startTestCycle()
action to be done when the test cycle is started (e.g.
Definition: NeuralNet.cxx:316
void setWeightSums(double sumOfSigWeights, double sumOfBkgWeights)
set the weight sums to be scaled to (preparations for monitoring output)
Definition: NeuralNet.cxx:515
std::vector< double > m_ams
Definition: NeuralNet.h:1005
std::vector< double > m_output
Definition: NeuralNet.h:1001
LayerData holds the data of one layer.
Definition: NeuralNet.h:436
const_iterator_type m_itInputBegin
iterator to the first of the nodes in the input node vector
Definition: NeuralNet.h:641
const_iterator_type deltasBegin() const
returns const iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:596
iterator_type valuesBegin()
returns iterator to the begin of the (node) values
Definition: NeuralNet.h:587
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition: NeuralNet.h:585
bool m_hasGradients
does this layer have gradients (only if in training mode)
Definition: NeuralNet.h:658
std::vector< double > m_deltas
stores the deltas for the DNN training
Definition: NeuralNet.h:644
container_type::iterator iterator_type
Definition: NeuralNet.h:440
LayerData(const_iterator_type itInputBegin, const_iterator_type itInputEnd, ModeOutputValues eModeOutput=ModeOutputValues::DIRECT)
c'tor of LayerData
Definition: NeuralNet.cxx:81
void setDropOut(Iterator itDrop)
set the drop-out info for this layer
Definition: NeuralNet.h:616
void setInput(const_iterator_type itInputBegin, const_iterator_type itInputEnd)
change the input iterators
Definition: NeuralNet.h:567
std::vector< std::function< double(double)> > function_container_type
Definition: NeuralNet.h:443
iterator_type valuesEnd()
returns iterator to the end of the (node) values
Definition: NeuralNet.h:588
const_dropout_iterator m_itDropOut
iterator to a container indicating if the corresponding node is to be dropped
Definition: NeuralNet.h:647
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition: NeuralNet.h:599
iterator_type gradientsBegin()
returns iterator to the begin of the gradients
Definition: NeuralNet.h:605
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:593
bool m_hasWeights
does this layer have weights (it does not if it is the input layer)
Definition: NeuralNet.h:657
const_dropout_iterator dropOut() const
return the begin of the drop-out information
Definition: NeuralNet.h:625
LayerData(LayerData &&other)
move c'tor of LayerData
Definition: NeuralNet.h:540
std::vector< double > container_type
Definition: NeuralNet.h:438
size_t size() const
return the size of the layer
Definition: NeuralNet.h:627
const_iterator_type weightsBegin() const
returns const iterator to the begin of the weights for this layer
Definition: NeuralNet.h:607
function_container_type::const_iterator const_function_iterator_type
Definition: NeuralNet.h:445
LayerData(const LayerData &other)
copy c'tor of LayerData
Definition: NeuralNet.h:517
function_container_type::iterator function_iterator_type
Definition: NeuralNet.h:444
std::vector< double > m_values
stores the values of the nodes in this layer
Definition: NeuralNet.h:646
const_iterator_type m_itInputEnd
iterator to the end of the nodes in the input node vector
Definition: NeuralNet.h:642
container_type::const_iterator const_iterator_type
Definition: NeuralNet.h:441
ModeOutputValues outputMode() const
returns the output mode
Definition: NeuralNet.h:590
iterator_type m_itGradientBegin
iterator to the first gradient of this layer in the gradient vector
Definition: NeuralNet.h:651
const_iterator_type gradientsBegin() const
returns const iterator to the begin of the gradients
Definition: NeuralNet.h:606
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition: NeuralNet.h:610
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:594
std::vector< double > m_valueGradients
stores the gradients of the values (nodes)
Definition: NeuralNet.h:645
const_iterator_type m_itConstWeightBegin
const iterator to the first weight of this layer in the weight vector
Definition: NeuralNet.h:650
iterator_type valueGradientsEnd()
returns iterator to the end of the gradients of the node values
Definition: NeuralNet.h:600
void clear()
clear the values and the deltas
Definition: NeuralNet.h:578
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition: NeuralNet.h:609
container_type computeProbabilities() const
compute the probabilities from the node values
Definition: NeuralNet.cxx:140
const_iterator_type deltasEnd() const
returns const iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:597
bool m_hasDropOut
dropOut is turned on?
Definition: NeuralNet.h:648
bool m_isInputLayer
is this layer an input layer
Definition: NeuralNet.h:656
bool hasDropOut() const
has this layer drop-out turned on?
Definition: NeuralNet.h:624
const_iterator_type valueGradientsBegin() const
returns const iterator to the begin of the gradients
Definition: NeuralNet.h:602
const_iterator_type valueGradientsEnd() const
returns const iterator to the end of the gradients
Definition: NeuralNet.h:603
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition: NeuralNet.h:591
void clearDropOut()
clear the drop-out-data for this layer
Definition: NeuralNet.h:622
ModeOutputValues m_eModeOutput
stores the output mode (DIRECT, SIGMOID, SOFTMAX)
Definition: NeuralNet.h:660
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
inverse activation function for this layer
Definition: NeuralNet.h:654
DropContainer::const_iterator const_dropout_iterator
Definition: NeuralNet.h:447
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition: NeuralNet.h:584
std::shared_ptr< std::function< double(double)> > m_activationFunction
activation function for this layer
Definition: NeuralNet.h:653
Layer defines the layout of a layer.
Definition: NeuralNet.h:675
void modeOutputValues(ModeOutputValues eModeOutputValues)
set the mode-output-value
Definition: NeuralNet.h:688
std::shared_ptr< std::function< double(double)> > m_activationFunction
stores the activation function
Definition: NeuralNet.h:701
std::shared_ptr< std::function< double(double)> > activationFunction() const
fetch the activation function for this layer
Definition: NeuralNet.h:693
size_t m_numNodes
Definition: NeuralNet.h:705
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
stores the inverse activation function
Definition: NeuralNet.h:702
size_t numNodes() const
return the number of nodes of this layer
Definition: NeuralNet.h:690
ModeOutputValues m_eModeOutputValues
do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned...
Definition: NeuralNet.h:707
size_t numWeights(size_t numInputNodes) const
return the number of weights for this layer (fully connected)
Definition: NeuralNet.h:691
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
fetch the inverse activation function for this layer
Definition: NeuralNet.h:694
EnumFunction m_activationFunctionType
Definition: NeuralNet.h:708
Layer(size_t numNodes, EnumFunction activationFunction, ModeOutputValues eModeOutputValues=ModeOutputValues::DIRECT)
c'tor for defining a Layer
Definition: NeuralNet.cxx:166
EnumFunction activationFunctionType() const
get the activation function type for this layer
Definition: NeuralNet.h:696
ModeOutputValues modeOutputValues() const
get the mode-output-value (direct, probabilities)
Definition: NeuralNet.h:687
double mean() const
Definition: NeuralNet.h:125
double var_corr() const
Definition: NeuralNet.h:135
void add(T value, double weight=1.0)
Definition: NeuralNet.h:92
double stdDev_corr() const
Definition: NeuralNet.h:143
double weights() const
Definition: NeuralNet.h:124
void add(ITERATOR itBegin, ITERATOR itEnd)
Definition: NeuralNet.h:115
double var() const
Definition: NeuralNet.h:126
double stdDev() const
Definition: NeuralNet.h:144
neural net
Definition: NeuralNet.h:1067
void setInputSize(size_t sizeInput)
set the input size of the DNN
Definition: NeuralNet.h:1097
std::vector< Layer > & layers()
returns the layers (structure)
Definition: NeuralNet.h:1251
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
Definition: NeuralNet.icc:1245
Net(const Net &other)
d'tor
Definition: NeuralNet.h:1090
bool * fExitFromTraining
Definition: NeuralNet.h:1282
std::vector< Layer > m_layers
layer-structure-data
Definition: NeuralNet.h:1277
UInt_t * fIPyMaxIter
Definition: NeuralNet.h:1283
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
Definition: NeuralNet.h:1288
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
Definition: NeuralNet.icc:1039
std::vector< double > container_type
Definition: NeuralNet.h:1070
container_type::iterator iterator_type
Definition: NeuralNet.h:1071
void preTrain(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
pre-training for future use
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
Definition: NeuralNet.icc:1300
size_t inputSize() const
input size of the DNN
Definition: NeuralNet.h:1103
std::pair< iterator_type, iterator_type > begin_end_type
Definition: NeuralNet.h:1072
ModeErrorFunction m_eErrorFunction
denotes the error function
Definition: NeuralNet.h:1274
void addLayer(Layer &&layer)
Definition: NeuralNet.h:1100
size_t numNodes(size_t trainingStartLayer=0) const
returns the number of nodes in this net
Definition: NeuralNet.cxx:559
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:712
const std::vector< Layer > & layers() const
returns the layers (structure)
Definition: NeuralNet.h:1250
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
Definition: NeuralNet.icc:1113
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
Definition: NeuralNet.h:1101
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1483
size_t outputSize() const
output size of the DNN
Definition: NeuralNet.h:1104
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
Definition: NeuralNet.icc:1592
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
Definition: NeuralNet.icc:1418
void removeLayer()
remove one layer
Definition: NeuralNet.h:1253
size_t m_sizeOutput
outut size of this DNN
Definition: NeuralNet.h:1276
size_t m_sizeInput
input size of this DNN
Definition: NeuralNet.h:1275
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
Definition: NeuralNet.icc:941
UInt_t * fIPyCurrentIter
Definition: NeuralNet.h:1283
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
Definition: NeuralNet.icc:1072
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
Definition: NeuralNet.icc:652
void fillDropContainer(DropContainer &dropContainer, double dropFraction, size_t numNodes) const
prepare the drop-out-container (select the nodes which are to be dropped out)
Definition: NeuralNet.cxx:575
void addLayer(Layer &layer)
add a layer (layout)
Definition: NeuralNet.h:1099
size_t numWeights(size_t trainingStartLayer=0) const
returns the number of weights in this net
Definition: NeuralNet.cxx:543
IPythonInteractive * fInteractive
Definition: NeuralNet.h:1281
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
Definition: NeuralNet.icc:1330
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Definition: NeuralNet.h:1098
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
Definition: NeuralNet.icc:1225
void backPropagate(std::vector< std::vector< LayerData > > &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Definition: NeuralNet.icc:1367
Settings for the training of the neural net.
Definition: NeuralNet.h:735
size_t m_batchSize
mini-batch size
Definition: NeuralNet.h:843
void setDropOut(Iterator begin, Iterator end, size_t _dropRepetitions)
set the drop-out configuration (layer-wise)
Definition: NeuralNet.h:764
void create(std::string histoName, int bins, double min, double max, int bins2, double min2, double max2)
for monitoring
Definition: NeuralNet.h:825
bool useMultithreading() const
is multithreading turned on?
Definition: NeuralNet.h:820
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition: NeuralNet.h:818
size_t convergenceCount() const
returns the current convergence count
Definition: NeuralNet.h:832
double momentum() const
get the momentum (e.g. for SGD)
Definition: NeuralNet.h:777
Timer m_timer
timer for monitoring
Definition: NeuralNet.h:837
size_t testRepetitions() const
how often is the test data tested
Definition: NeuralNet.h:773
void clear(std::string histoName)
for monitoring
Definition: NeuralNet.h:829
virtual void endTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:810
MinimizerType fMinimizerType
Definition: NeuralNet.h:860
void addPoint(std::string histoName, double x, double y)
for monitoring
Definition: NeuralNet.h:827
void setMonitoring(std::shared_ptr< Monitoring > ptrMonitoring)
prepared for monitoring
Definition: NeuralNet.h:769
virtual void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:811
size_t m_convergenceSteps
number of steps without improvement to consider the DNN to have converged
Definition: NeuralNet.h:842
virtual bool hasConverged(double testError)
has this training converged already?
Definition: NeuralNet.cxx:488
MinimizerType minimizerType() const
which minimizer shall be used (e.g. SGD)
Definition: NeuralNet.h:779
std::vector< double > m_dropOut
Definition: NeuralNet.h:855
double m_minProgress
current limits for the progress bar
Definition: NeuralNet.h:838
virtual void cycle(double progress, TString text)
Definition: NeuralNet.h:804
Settings(TString name, size_t _convergenceSteps=15, size_t _batchSize=10, size_t _testRepetitions=7, double _factorWeightDecay=1e-5, TMVA::DNN::EnumRegularization _regularization=TMVA::DNN::EnumRegularization::NONE, MinimizerType _eMinimizerType=MinimizerType::fSteepest, double _learningRate=1e-5, double _momentum=0.3, int _repetitions=3, bool _multithreading=true)
c'tor
Definition: NeuralNet.cxx:232
virtual void setProgressLimits(double minProgress=0, double maxProgress=100)
Definition: NeuralNet.h:795
double m_maxProgress
current limits for the progress bar
Definition: NeuralNet.h:839
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition: NeuralNet.h:793
virtual void drawSample(const std::vector< double > &, const std::vector< double > &, const std::vector< double > &, double)
callback for monitoring and loggging
Definition: NeuralNet.h:812
double learningRate() const
get the learning rate
Definition: NeuralNet.h:776
double m_dropRepetitions
Definition: NeuralNet.h:854
const std::vector< double > & dropFractions() const
Definition: NeuralNet.h:767
void addPoint(std::string histoName, double x)
for monitoring
Definition: NeuralNet.h:826
virtual ~Settings()
d'tor
Definition: NeuralNet.cxx:261
size_t m_convergenceCount
Definition: NeuralNet.h:862
EnumRegularization m_regularization
Definition: NeuralNet.h:852
int repetitions() const
how many steps have to be gone until the batch is changed
Definition: NeuralNet.h:778
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition: NeuralNet.h:786
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition: NeuralNet.h:828
virtual void startTrainCycle()
Definition: NeuralNet.h:787
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition: NeuralNet.h:771
double m_factorWeightDecay
Definition: NeuralNet.h:845
double factorWeightDecay() const
get the weight-decay factor
Definition: NeuralNet.h:774
bool exists(std::string histoName)
for monitoring
Definition: NeuralNet.h:830
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition: NeuralNet.h:833
void pads(int numPads)
preparation for monitoring
Definition: NeuralNet.h:823
size_t m_testRepetitions
Definition: NeuralNet.h:844
size_t batchSize() const
mini-batch size
Definition: NeuralNet.h:772
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition: NeuralNet.h:814
std::shared_ptr< Monitoring > fMonitoring
Definition: NeuralNet.h:870
size_t dropRepetitions() const
Definition: NeuralNet.h:766
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition: NeuralNet.h:824
size_t minError() const
returns the smallest error so far
Definition: NeuralNet.h:834
virtual void startTraining()
Definition: NeuralNet.h:800
size_t m_maxConvergenceCount
Definition: NeuralNet.h:863
virtual void startTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:809
Steepest Gradient Descent algorithm (SGD)
Definition: NeuralNet.h:333
double m_beta
internal parameter (momentum)
Definition: NeuralNet.h:371
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition: NeuralNet.h:375
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition: NeuralNet.h:372
double m_alpha
internal parameter (learningRate)
Definition: NeuralNet.h:370
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition: NeuralNet.h:374
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
Definition: NeuralNet.icc:271
Steepest(double learningRate=1e-4, double momentum=0.5, size_t repetitions=10)
c'tor
Definition: NeuralNet.h:347
This class is needed by JsMVA, and it's a helper class for tracking errors during the training in Jup...
Definition: MethodBase.h:94
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:203
Basic string class.
Definition: TString.h:131
TText * text
RooCmdArg Minimizer(const char *type, const char *alg=0)
const Double_t sigma
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
static double Q[]
static double C[]
double T(double x)
Definition: ChebyshevPol.h:34
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:151
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
double uniformDouble(double minValue, double maxValue)
Definition: NeuralNet.cxx:43
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
Definition: NeuralNet.icc:546
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
ModeOutputValues operator|(ModeOutputValues lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:187
EnumRegularization
Definition: NeuralNet.h:172
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:412
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
Definition: NeuralNet.icc:572
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
ModeOutputValues operator&=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:203
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:238
ModeErrorFunction
error functions to be chosen from
Definition: NeuralNet.h:1044
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
Definition: NeuralNet.icc:458
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition: NeuralNet.h:1055
ModeOutputValues operator|=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:192
MinimizerType
< list all the minimizer types
Definition: NeuralNet.h:320
@ fSteepest
SGD.
Definition: NeuralNet.h:321
double gaussDouble(double mean, double sigma)
Definition: NeuralNet.cxx:35
ModeOutputValues operator&(ModeOutputValues lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:198
ModeOutputValues
Definition: NeuralNet.h:178
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition: NeuralNet.h:1299
bool isFlagSet(T flag, T value)
Definition: NeuralNet.h:211
int randomInt(int maxValue)
Definition: NeuralNet.cxx:52
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
Definition: NeuralNet.icc:183
std::vector< char > DropContainer
Definition: NeuralNet.h:218
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
create variable transformations
static void output(int code)
Definition: gifencode.c:226