Logo ROOT  
Reference Guide
NeuralNet.h
Go to the documentation of this file.
1/**
2 * @file NeuralNet
3 * @author Peter Speckmayer
4 * @version 1.0
5 *
6 * @section LICENSE
7 *
8 *
9 * @section Neural net implementation
10 *
11 * An implementation of a neural net for TMVA. This neural net uses multithreading
12 *
13 */
14
15
16//////////////////////////////////////////////////////////////////////////
17// //
18// NeuralNet //
19// //
20// A neural net implementation //
21// //
22//////////////////////////////////////////////////////////////////////////
23
24#ifndef TMVA_NEURAL_NET
25#define TMVA_NEURAL_NET
26#pragma once
27
28#include <map>
29#include <vector>
30#include <iostream>
31#include <fstream>
32#include <algorithm>
33#include <iterator>
34#include <functional>
35#include <tuple>
36#include <cmath>
37#include <cassert>
38#include <random>
39#include <thread>
40#include <future>
41#include <type_traits>
42
43#include "Pattern.h"
44#include "Monitoring.h"
45
46#include "TApplication.h"
47#include "Timer.h"
48
49#include "TH1F.h"
50#include "TH2F.h"
51#include "TStyle.h"
52
53#include <fenv.h> // turn on or off exceptions for NaN and other numeric exceptions
54
55
56namespace TMVA
57{
58
59 class IPythonInteractive;
60
61 namespace DNN
62 {
63
64 // double gaussDoubl (edouble mean, double sigma);
65
66
67
68 double gaussDouble (double mean, double sigma);
69 double uniformDouble (double minValue, double maxValue);
70 int randomInt (int maxValue);
71
72
73
74
76 {
77 public:
79 : m_n(0)
80 , m_sumWeights(0)
81 , m_mean(0)
82 , m_squared(0)
83 {}
84
85 inline void clear()
86 {
87 m_n = 0;
88 m_sumWeights = 0;
89 m_mean = 0;
90 m_squared = 0;
91 }
92
93 template <typename T>
94 inline void add(T value, double weight = 1.0)
95 {
96 ++m_n; // a value has been added
97
98 if (m_n == 1) // initialization
99 {
100 m_mean = value;
101 m_squared = 0.0;
102 m_sumWeights = weight;
103 return;
104 }
105
106 double tmpWeight = m_sumWeights+weight;
107 double Q = value - m_mean;
108
109 double R = Q*weight/tmpWeight;
110 m_mean += R;
112
113 m_sumWeights = tmpWeight;
114 }
115
116 template <typename ITERATOR>
117 inline void add (ITERATOR itBegin, ITERATOR itEnd)
118 {
119 for (ITERATOR it = itBegin; it != itEnd; ++it)
120 add (*it);
121 }
122
123
124
125 inline int count() const { return m_n; }
126 inline double weights() const { if(m_n==0) return 0; return m_sumWeights; }
127 inline double mean() const { if(m_n==0) return 0; return m_mean; }
128 inline double var() const
129 {
130 if(m_n==0)
131 return 0;
132 if (m_squared <= 0)
133 return 0;
134 return (m_squared/m_sumWeights);
135 }
136
137 inline double var_corr () const
138 {
139 if (m_n <= 1)
140 return var ();
141
142 return (var()*m_n/(m_n-1)); // unbiased for small sample sizes
143 }
144
145 inline double stdDev_corr () const { return sqrt( var_corr() ); }
146 inline double stdDev () const { return sqrt( var() ); } // unbiased for small sample sizes
147
148 private:
149 size_t m_n;
151 double m_mean;
152 double m_squared;
153 };
154
155
156
157 enum class EnumFunction
158 {
159 ZERO = '0',
160 LINEAR = 'L',
161 TANH = 'T',
162 RELU = 'R',
163 SYMMRELU = 'r',
164 TANHSHIFT = 't',
165 SIGMOID = 's',
166 SOFTSIGN = 'S',
167 GAUSS = 'G',
168 GAUSSCOMPLEMENT = 'C'
169 };
170
171
172
174 {
175 NONE, L1, L2, L1MAX
176 };
177
178
179 enum class ModeOutputValues : int
180 {
181 DIRECT = 0x01,
182 SIGMOID = 0x02,
183 SOFTMAX = 0x04,
184 BATCHNORMALIZATION = 0x08
185 };
186
187
188
190 {
192 }
193
195 {
197 return lhs;
198 }
199
201 {
203 }
204
206 {
208 return lhs;
209 }
210
211
212 template <typename T>
213 bool isFlagSet (T flag, T value)
214 {
215 return (int)(value & flag) != 0;
216 }
217
218
219
220 class Net;
221
222
223
224
225
226
227
228 typedef std::vector<char> DropContainer;
229
230
231 /*! \brief The Batch class encapsulates one mini-batch
232 *
233 * Holds a const_iterator to the beginning and the end of one batch in a vector of Pattern
234 */
235 class Batch
236 {
237 public:
238 typedef typename std::vector<Pattern>::const_iterator const_iterator;
239
240 Batch (typename std::vector<Pattern>::const_iterator itBegin, typename std::vector<Pattern>::const_iterator itEnd)
241 : m_itBegin (itBegin)
242 , m_itEnd (itEnd)
243 {}
244
245 const_iterator begin () const { return m_itBegin; }
246 const_iterator end () const { return m_itEnd; }
247
248 size_t size () const { return std::distance (begin (), end ()); }
249
250 private:
251 const_iterator m_itBegin; ///< iterator denoting the beginning of the batch
252 const_iterator m_itEnd; ///< iterator denoting the end of the batch
253 };
254
255
256
257
258
259
260 template <typename ItSource, typename ItWeight, typename ItTarget>
261 void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd);
262
263
264
265 template <typename ItSource, typename ItWeight, typename ItPrev>
266 void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd);
267
268
269
270
271
272 template <typename ItValue, typename ItFunction>
273 void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction);
274
275
276 template <typename ItValue, typename ItFunction, typename ItInverseFunction, typename ItGradient>
277 void applyFunctions (ItValue itValue, ItValue itValueEnd, ItFunction itFunction, ItInverseFunction itInverseFunction, ItGradient itGradient);
278
279
280
281 template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
282 void update (ItSource itSource, ItSource itSourceEnd,
283 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
284 ItTargetGradient itTargetGradientBegin,
285 ItGradient itGradient);
286
287
288
289 template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
290 void update (ItSource itSource, ItSource itSourceEnd,
291 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
292 ItTargetGradient itTargetGradientBegin,
293 ItGradient itGradient,
294 ItWeight itWeight, double weightDecay);
295
296
297
298 // ----- signature of a minimizer -------------
299 // class Minimizer
300 // {
301 // public:
302
303 // template <typename Function, typename Variables, typename PassThrough>
304 // double operator() (Function& fnc, Variables& vars, PassThrough& passThrough)
305 // {
306 // // auto itVars = begin (vars);
307 // // auto itVarsEnd = end (vars);
308
309 // std::vector<double> myweights;
310 // std::vector<double> gradients;
311
312 // double value = fnc (passThrough, myweights);
313 // value = fnc (passThrough, myweights, gradients);
314 // return value;
315 // }
316 // };
317
318
319
320 ///< list all the minimizer types
322 {
323 fSteepest ///< SGD
324 };
325
326
327
328
329
330 /*! \brief Steepest Gradient Descent algorithm (SGD)
331 *
332 * Implements a steepest gradient descent minimization algorithm
333 */
335 {
336 public:
337
339
340
341 /*! \brief c'tor
342 *
343 * C'tor
344 *
345 * \param learningRate denotes the learning rate for the SGD algorithm
346 * \param momentum fraction of the velocity which is taken over from the last step
347 * \param repetitions re-compute the gradients each "repetitions" steps
348 */
349 Steepest (double learningRate = 1e-4,
350 double momentum = 0.5,
351 size_t repetitions = 10)
352 : m_repetitions (repetitions)
353 , m_alpha (learningRate)
354 , m_beta (momentum)
355 {}
356
357 /*! \brief operator to call the steepest gradient descent algorithm
358 *
359 * entry point to start the minimization procedure
360 *
361 * \param fitnessFunction (templated) function which has to be provided. This function is minimized
362 * \param weights (templated) a reference to a container of weights. The result of the minimization procedure
363 * is returned via this reference (needs to support std::begin and std::end
364 * \param passThrough (templated) object which can hold any data which the fitness function needs. This object
365 * is not touched by the minimizer; This object is provided to the fitness function when
366 * called
367 */
368 template <typename Function, typename Weights, typename PassThrough>
369 double operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough);
370
371
372 double m_alpha; ///< internal parameter (learningRate)
373 double m_beta; ///< internal parameter (momentum)
374 std::vector<double> m_prevGradients; ///< vector remembers the gradients of the previous step
375
376 std::vector<double> m_localWeights; ///< local weights for reuse in thread.
377 std::vector<double> m_localGradients; ///< local gradients for reuse in thread.
378 };
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397 template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
398 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
399
400
401
402 template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
403 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
404
405
406
407
408 template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
409 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight);
410
411
412
413
414
415 template <typename ItWeight>
416 double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization);
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431 /*! \brief LayerData holds the data of one layer
432 *
433 * LayerData holds the data of one layer, but not its layout
434 *
435 *
436 */
438 {
439 public:
440 typedef std::vector<double> container_type;
441
442 typedef container_type::iterator iterator_type;
443 typedef container_type::const_iterator const_iterator_type;
444
445 typedef std::vector<std::function<double(double)> > function_container_type;
446 typedef function_container_type::iterator function_iterator_type;
447 typedef function_container_type::const_iterator const_function_iterator_type;
448
449 typedef DropContainer::const_iterator const_dropout_iterator;
450
451 /*! \brief c'tor of LayerData
452 *
453 * C'tor of LayerData for the input layer
454 *
455 * \param itInputBegin iterator to the begin of a vector which holds the values of the nodes of the neural net
456 * \param itInputEnd iterator to the end of a vector which holdsd the values of the nodes of the neural net
457 * \param eModeOutput indicates a potential tranformation of the output values before further computation
458 * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
459 * output value (to create a probability); SOFTMAX applies a softmax transformation to all
460 * output values (mutually exclusive probability)
461 */
463
464
465 /*! \brief c'tor of LayerData
466 *
467 * C'tor of LayerData for the input layer
468 *
469 * \param inputSize input size of this layer
470 */
471 LayerData (size_t inputSize);
473
474
475 /*! \brief c'tor of LayerData
476 *
477 * C'tor of LayerData for all layers which are not the input layer; Used during the training of the DNN
478 *
479 * \param size size of the layer
480 * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
481 * \param itGradientBegin indicates the start of the gradients for this layer on the gradient vector
482 * \param itFunctionBegin indicates the start of the vector of activation functions for this layer on the
483 * activation function vector
484 * \param itInverseFunctionBegin indicates the start of the vector of activation functions for this
485 * layer on the activation function vector
486 * \param eModeOutput indicates a potential tranformation of the output values before further computation
487 * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
488 * output value (to create a probability); SOFTMAX applies a softmax transformation to all
489 * output values (mutually exclusive probability)
490 */
491 LayerData (size_t size,
492 const_iterator_type itWeightBegin,
493 iterator_type itGradientBegin,
494 std::shared_ptr<std::function<double(double)>> activationFunction,
495 std::shared_ptr<std::function<double(double)>> inverseActivationFunction,
497
498 /*! \brief c'tor of LayerData
499 *
500 * C'tor of LayerData for all layers which are not the input layer; Used during the application of the DNN
501 *
502 * \param size size of the layer
503 * \param itWeightBegin indicates the start of the weights for this layer on the weight vector
504 * \param itFunctionBegin indicates the start of the vector of activation functions for this layer on the
505 * activation function vector
506 * \param eModeOutput indicates a potential tranformation of the output values before further computation
507 * DIRECT does not further transformation; SIGMOID applies a sigmoid transformation to each
508 * output value (to create a probability); SOFTMAX applies a softmax transformation to all
509 * output values (mutually exclusive probability)
510 */
511 LayerData (size_t size, const_iterator_type itWeightBegin,
512 std::shared_ptr<std::function<double(double)>> activationFunction,
514
515 /*! \brief copy c'tor of LayerData
516 *
517 *
518 */
519 LayerData (const LayerData& other)
520 : m_size (other.m_size)
522 , m_itInputEnd (other.m_itInputEnd)
523 , m_deltas (other.m_deltas)
525 , m_values (other.m_values)
526 , m_itDropOut (other.m_itDropOut)
527 , m_hasDropOut (other.m_hasDropOut)
533 , m_hasWeights (other.m_hasWeights)
536 {}
537
538 /*! \brief move c'tor of LayerData
539 *
540 *
541 */
543 : m_size (other.m_size)
545 , m_itInputEnd (other.m_itInputEnd)
546 , m_deltas (std::move(other.m_deltas))
547 , m_valueGradients (std::move(other.m_valueGradients))
548 , m_values (std::move(other.m_values))
549 , m_itDropOut (other.m_itDropOut)
550 , m_hasDropOut (other.m_hasDropOut)
553 , m_activationFunction (std::move(other.m_activationFunction))
556 , m_hasWeights (other.m_hasWeights)
559 {}
560
561
562 /*! \brief change the input iterators
563 *
564 *
565 * \param itInputBegin indicates the start of the input node vector
566 * \param itInputEnd indicates the end of the input node vector
567 *
568 */
569 void setInput (const_iterator_type itInputBegin, const_iterator_type itInputEnd)
570 {
571 m_isInputLayer = true;
572 m_itInputBegin = itInputBegin;
573 m_itInputEnd = itInputEnd;
574 }
575
576 /*! \brief clear the values and the deltas
577 *
578 *
579 */
580 void clear ()
581 {
582 m_values.assign (m_values.size (), 0.0);
583 m_deltas.assign (m_deltas.size (), 0.0);
584 }
585
586 const_iterator_type valuesBegin () const { return m_isInputLayer ? m_itInputBegin : begin (m_values); } ///< returns const iterator to the begin of the (node) values
587 const_iterator_type valuesEnd () const { return m_isInputLayer ? m_itInputEnd : end (m_values); } ///< returns iterator to the end of the (node) values
588
589 iterator_type valuesBegin () { assert (!m_isInputLayer); return begin (m_values); } ///< returns iterator to the begin of the (node) values
590 iterator_type valuesEnd () { assert (!m_isInputLayer); return end (m_values); } ///< returns iterator to the end of the (node) values
591
592 ModeOutputValues outputMode () const { return m_eModeOutput; } ///< returns the output mode
593 container_type probabilities () const { return computeProbabilities (); } ///< computes the probabilities from the current node values and returns them
594
595 iterator_type deltasBegin () { return begin (m_deltas); } ///< returns iterator to the begin of the deltas (back-propagation)
596 iterator_type deltasEnd () { return end (m_deltas); } ///< returns iterator to the end of the deltas (back-propagation)
597
598 const_iterator_type deltasBegin () const { return begin (m_deltas); } ///< returns const iterator to the begin of the deltas (back-propagation)
599 const_iterator_type deltasEnd () const { return end (m_deltas); } ///< returns const iterator to the end of the deltas (back-propagation)
600
601 iterator_type valueGradientsBegin () { return begin (m_valueGradients); } ///< returns iterator to the begin of the gradients of the node values
602 iterator_type valueGradientsEnd () { return end (m_valueGradients); } ///< returns iterator to the end of the gradients of the node values
603
604 const_iterator_type valueGradientsBegin () const { return begin (m_valueGradients); } ///< returns const iterator to the begin of the gradients
605 const_iterator_type valueGradientsEnd () const { return end (m_valueGradients); } ///< returns const iterator to the end of the gradients
606
607 iterator_type gradientsBegin () { assert (m_hasGradients); return m_itGradientBegin; } ///< returns iterator to the begin of the gradients
608 const_iterator_type gradientsBegin () const { assert (m_hasGradients); return m_itGradientBegin; } ///< returns const iterator to the begin of the gradients
609 const_iterator_type weightsBegin () const { assert (m_hasWeights); return m_itConstWeightBegin; } ///< returns const iterator to the begin of the weights for this layer
610
611 std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; }
612 std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; }
613
614 /*! \brief set the drop-out info for this layer
615 *
616 */
617 template <typename Iterator>
618 void setDropOut (Iterator itDrop) { m_itDropOut = itDrop; m_hasDropOut = true; }
619
620 /*! \brief clear the drop-out-data for this layer
621 *
622 *
623 */
624 void clearDropOut () { m_hasDropOut = false; }
625
626 bool hasDropOut () const { return m_hasDropOut; } ///< has this layer drop-out turned on?
627 const_dropout_iterator dropOut () const { assert (m_hasDropOut); return m_itDropOut; } ///< return the begin of the drop-out information
628
629 size_t size () const { return m_size; } ///< return the size of the layer
630
631 private:
632
633 /*! \brief compute the probabilities from the node values
634 *
635 *
636 */
638
639 private:
640
641 size_t m_size; ////< layer size
642
643 const_iterator_type m_itInputBegin; ///< iterator to the first of the nodes in the input node vector
644 const_iterator_type m_itInputEnd; ///< iterator to the end of the nodes in the input node vector
645
646 std::vector<double> m_deltas; ///< stores the deltas for the DNN training
647 std::vector<double> m_valueGradients; ///< stores the gradients of the values (nodes)
648 std::vector<double> m_values; ///< stores the values of the nodes in this layer
649 const_dropout_iterator m_itDropOut; ///< iterator to a container indicating if the corresponding node is to be dropped
650 bool m_hasDropOut; ///< dropOut is turned on?
651
652 const_iterator_type m_itConstWeightBegin; ///< const iterator to the first weight of this layer in the weight vector
653 iterator_type m_itGradientBegin; ///< iterator to the first gradient of this layer in the gradient vector
654
655 std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< activation function for this layer
656 std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< inverse activation function for this layer
657
658 bool m_isInputLayer; ///< is this layer an input layer
659 bool m_hasWeights; ///< does this layer have weights (it does not if it is the input layer)
660 bool m_hasGradients; ///< does this layer have gradients (only if in training mode)
661
662 ModeOutputValues m_eModeOutput; ///< stores the output mode (DIRECT, SIGMOID, SOFTMAX)
663
664 };
665
666
667
668
669
670 /*! \brief Layer defines the layout of a layer
671 *
672 * Layer defines the layout of a specific layer in the DNN
673 * Objects of this class don't hold the layer data itself (see class "LayerData")
674 *
675 */
676 class Layer
677 {
678 public:
679
680 /*! \brief c'tor for defining a Layer
681 *
682 *
683 * \param itInputBegin indicates the start of the input node vector
684 * \param itInputEnd indicates the end of the input node vector
685 *
686 */
688
689 ModeOutputValues modeOutputValues () const { return m_eModeOutputValues; } ///< get the mode-output-value (direct, probabilities)
690 void modeOutputValues (ModeOutputValues eModeOutputValues) { m_eModeOutputValues = eModeOutputValues; } ///< set the mode-output-value
691
692 size_t numNodes () const { return m_numNodes; } ///< return the number of nodes of this layer
693 size_t numWeights (size_t numInputNodes) const { return numInputNodes * numNodes (); } ///< return the number of weights for this layer (fully connected)
694
695 std::shared_ptr<std::function<double(double)>> activationFunction () const { return m_activationFunction; } ///< fetch the activation function for this layer
696 std::shared_ptr<std::function<double(double)>> inverseActivationFunction () const { return m_inverseActivationFunction; } ///< fetch the inverse activation function for this layer
697
698 EnumFunction activationFunctionType () const { return m_activationFunctionType; } ///< get the activation function type for this layer
699
700 private:
701
702
703 std::shared_ptr<std::function<double(double)>> m_activationFunction; ///< stores the activation function
704 std::shared_ptr<std::function<double(double)>> m_inverseActivationFunction; ///< stores the inverse activation function
705
706
708
709 ModeOutputValues m_eModeOutputValues; ///< do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned as such
711
712 friend class Net;
713 };
714
715
716
717
718
719 template <typename LAYERDATA>
720 void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
721
722
723 template <typename LAYERDATA>
724 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData);
725
726
727 template <typename LAYERDATA>
728 void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double weightDecay, EnumRegularization regularization);
729
730
731
732 /*! \brief Settings for the training of the neural net
733 *
734 *
735 */
737 {
738 public:
739
740 /*! \brief c'tor
741 *
742 *
743 */
745 size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
746 double _factorWeightDecay = 1e-5, TMVA::DNN::EnumRegularization _regularization = TMVA::DNN::EnumRegularization::NONE,
747 MinimizerType _eMinimizerType = MinimizerType::fSteepest,
748 double _learningRate = 1e-5, double _momentum = 0.3,
749 int _repetitions = 3,
750 bool _multithreading = true);
751
752 /*! \brief d'tor
753 *
754 *
755 */
756 virtual ~Settings ();
757
758
759 /*! \brief set the drop-out configuration (layer-wise)
760 *
761 * \param begin begin of an array or vector denoting the drop-out probabilities for each layer
762 * \param end end of an array or vector denoting the drop-out probabilities for each layer
763 * \param _dropRepetitions denotes after how many repetitions the drop-out setting (which nodes are dropped out exactly) is changed
764 */
765 template <typename Iterator>
766 void setDropOut (Iterator begin, Iterator end, size_t _dropRepetitions) { m_dropOut.assign (begin, end); m_dropRepetitions = _dropRepetitions; }
767
768 size_t dropRepetitions () const { return m_dropRepetitions; }
769 const std::vector<double>& dropFractions () const { return m_dropOut; }
770
771 void setMonitoring (std::shared_ptr<Monitoring> ptrMonitoring) { fMonitoring = ptrMonitoring; } ///< prepared for monitoring
772
773 size_t convergenceSteps () const { return m_convergenceSteps; } ///< how many steps until training is deemed to have converged
774 size_t batchSize () const { return m_batchSize; } ///< mini-batch size
775 size_t testRepetitions () const { return m_testRepetitions; } ///< how often is the test data tested
776 double factorWeightDecay () const { return m_factorWeightDecay; } ///< get the weight-decay factor
777
778 double learningRate () const { return fLearningRate; } ///< get the learning rate
779 double momentum () const { return fMomentum; } ///< get the momentum (e.g. for SGD)
780 int repetitions () const { return fRepetitions; } ///< how many steps have to be gone until the batch is changed
781 MinimizerType minimizerType () const { return fMinimizerType; } ///< which minimizer shall be used (e.g. SGD)
782
783
784
785
786
787
788 virtual void testSample (double /*error*/, double /*output*/, double /*target*/, double /*weight*/) {} ///< virtual function to be used for monitoring (callback)
789 virtual void startTrainCycle () ///< callback for monitoring and logging
790 {
793 m_minError = 1e10;
794 }
795 virtual void endTrainCycle (double /*error*/) {} ///< callback for monitoring and logging
796
797 virtual void setProgressLimits (double minProgress = 0, double maxProgress = 100) ///< for monitoring and logging (set the current "progress" limits for the display of the progress)
798 {
799 m_minProgress = minProgress;
800 m_maxProgress = maxProgress;
801 }
802 virtual void startTraining () ///< start drawing the progress bar
803 {
805 }
806 virtual void cycle (double progress, TString text) ///< advance on the progress bar
807 {
809 }
810
811 virtual void startTestCycle () {} ///< callback for monitoring and loggging
812 virtual void endTestCycle () {} ///< callback for monitoring and loggging
813 virtual void testIteration () {} ///< callback for monitoring and loggging
814 virtual void drawSample (const std::vector<double>& /*input*/, const std::vector<double>& /* output */, const std::vector<double>& /* target */, double /* patternWeight */) {} ///< callback for monitoring and loggging
815
816 virtual void computeResult (const Net& /* net */, std::vector<double>& /* weights */) {} ///< callback for monitoring and loggging
817
818 virtual bool hasConverged (double testError); ///< has this training converged already?
819
820 EnumRegularization regularization () const { return m_regularization; } ///< some regularization of the DNN is turned on?
821
822 bool useMultithreading () const { return m_useMultithreading; } ///< is multithreading turned on?
823
824
825 void pads (int numPads) { if (fMonitoring) fMonitoring->pads (numPads); } ///< preparation for monitoring
826 void create (std::string histoName, int bins, double min, double max) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max); } ///< for monitoring
827 void create (std::string histoName, int bins, double min, double max, int bins2, double min2, double max2) { if (fMonitoring) fMonitoring->create (histoName, bins, min, max, bins2, min2, max2); } ///< for monitoring
828 void addPoint (std::string histoName, double x) { if (fMonitoring) fMonitoring->addPoint (histoName, x); } ///< for monitoring
829 void addPoint (std::string histoName, double x, double y) {if (fMonitoring) fMonitoring->addPoint (histoName, x, y); } ///< for monitoring
830 void plot (std::string histoName, std::string options, int pad, EColor color) { if (fMonitoring) fMonitoring->plot (histoName, options, pad, color); } ///< for monitoring
831 void clear (std::string histoName) { if (fMonitoring) fMonitoring->clear (histoName); } ///< for monitoring
832 bool exists (std::string histoName) { if (fMonitoring) return fMonitoring->exists (histoName); return false; } ///< for monitoring
833
834 size_t convergenceCount () const { return m_convergenceCount; } ///< returns the current convergence count
835 size_t maxConvergenceCount () const { return m_maxConvergenceCount; } ///< returns the max convergence count so far
836 size_t minError () const { return m_minError; } ///< returns the smallest error so far
837
838 public:
839 Timer m_timer; ///< timer for monitoring
840 double m_minProgress; ///< current limits for the progress bar
841 double m_maxProgress; ///< current limits for the progress bar
842
843
844 size_t m_convergenceSteps; ///< number of steps without improvement to consider the DNN to have converged
845 size_t m_batchSize; ///< mini-batch size
848
849 size_t count_E;
850 size_t count_dE;
853
855
857 std::vector<double> m_dropOut;
858
860 double fMomentum;
863
867
868
869 protected:
871
872 std::shared_ptr<Monitoring> fMonitoring;
873 };
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897 /*! \brief Settings for classification
898 *
899 * contains additional settings if the DNN problem is classification
900 */
902 {
903 public:
904 /*! \brief c'tor
905 *
906 *
907 */
909 size_t _convergenceSteps = 15, size_t _batchSize = 10, size_t _testRepetitions = 7,
910 double _factorWeightDecay = 1e-5, EnumRegularization _regularization = EnumRegularization::NONE,
911 size_t _scaleToNumEvents = 0, MinimizerType _eMinimizerType = MinimizerType::fSteepest,
912 double _learningRate = 1e-5, double _momentum = 0.3, int _repetitions = 3,
913 bool _useMultithreading = true)
914 : Settings (name, _convergenceSteps, _batchSize, _testRepetitions, _factorWeightDecay,
915 _regularization, _eMinimizerType, _learningRate, _momentum, _repetitions, _useMultithreading)
916 , m_ams ()
919 , m_scaleToNumEvents (_scaleToNumEvents)
920 , m_cutValue (10.0)
924 {
925 }
926
927 /*! \brief d'tor
928 *
929 *
930 */
932 {
933 }
934
935 void startTrainCycle ();
936 void endTrainCycle (double /*error*/);
937 void testIteration () { if (fMonitoring) fMonitoring->ProcessEvents (); }
938
939
940 /* void createHistograms () */
941 /* { */
942 /* std::cout << "is hist ROC existing?" << std::endl; */
943 /* if (m_histROC) */
944 /* { */
945 /* std::cout << "--> yes" << std::endl; */
946 /* fMonitoring->ProcessEvents (); */
947 /* return; */
948 /* } */
949
950 /* std::cout << "create histograms" << std::endl; */
951 /* TCanvas* canvas = fMonitoring->GetCanvas (); */
952 /* if (canvas) */
953 /* { */
954 /* std::cout << "canvas divide" << std::endl; */
955 /* canvas->cd (); */
956 /* canvas->Divide (2,2); */
957 /* } */
958 /* if (!m_histROC) */
959 /* { */
960 /* m_histROC = new TH2F ("ROC","ROC", 1000, 0, 1.0, 1000, 0, 1.0); m_histROC->SetDirectory (0); */
961 /* m_histROC->SetLineColor (kBlue); */
962 /* } */
963 /* if (!m_histSignificance) */
964 /* { */
965 /* m_histSignificance = new TH2F ("Significance", "Significance", 1000, 0,1.0, 5, 0.0, 2.0); */
966 /* m_histSignificance->SetDirectory (0); */
967 /* m_histSignificance->SetBit (TH1::kCanRebin); */
968 /* m_histROC->SetLineColor (kRed); */
969 /* } */
970 /* if (!m_histError) */
971 /* { */
972 /* m_histError = new TH1F ("Error", "Error", 100, 0, 100); */
973 /* m_histError->SetDirectory (0); */
974 /* m_histError->SetBit (TH1::kCanRebin); */
975 /* m_histROC->SetLineColor (kGreen); */
976 /* } */
977 /* if (!m_histOutputSignal) */
978 /* { */
979 /* m_histOutputSignal = new TH1F ("Signal", "Signal", 100, 0, 1.0); */
980 /* m_histOutputSignal->SetDirectory (0); */
981 /* m_histOutputSignal->SetBit (TH1::kCanRebin); */
982 /* } */
983 /* if (!m_histOutputBackground) */
984 /* { */
985 /* m_histOutputBackground = new TH1F ("Background", "Background", 100, 0, 1.0); */
986 /* m_histOutputBackground->SetDirectory (0); */
987 /* m_histOutputBackground->SetBit (TH1::kCanRebin); */
988 /* } */
989
990 /* fMonitoring->ProcessEvents (); */
991 /* } */
992
993 void testSample (double error, double output, double target, double weight);
994
995 virtual void startTestCycle ();
996 virtual void endTestCycle ();
997
998
999 void setWeightSums (double sumOfSigWeights, double sumOfBkgWeights);
1000 void setResultComputation (std::string _fileNameNetConfig, std::string _fileNameResult, std::vector<Pattern>* _resultPatternContainer);
1001
1002 std::vector<double> m_input;
1003 std::vector<double> m_output;
1004 std::vector<double> m_targets;
1005 std::vector<double> m_weights;
1006
1007 std::vector<double> m_ams;
1008 std::vector<double> m_significances;
1009
1010
1014
1016 std::vector<Pattern>* m_pResultPatternContainer;
1017 std::string m_fileNameResult;
1019
1020
1021 /* TH2F* m_histROC; */
1022 /* TH2F* m_histSignificance; */
1023
1024 /* TH1F* m_histError; */
1025 /* TH1F* m_histOutputSignal; */
1026 /* TH1F* m_histOutputBackground; */
1027 };
1028
1029
1030
1031
1032
1033
1034
1035 ///< used to distinguish between different function signatures
1036 enum class ModeOutput
1037 {
1038 FETCH
1039 };
1040
1041 /*! \brief error functions to be chosen from
1042 *
1043 *
1044 */
1046 {
1047 SUMOFSQUARES = 'S',
1048 CROSSENTROPY = 'C',
1050 };
1051
1052 /*! \brief weight initialization strategies to be chosen from
1053 *
1054 *
1055 */
1057 {
1059 };
1060
1061
1062
1063 /*! \brief neural net
1064 *
1065 * holds the structure of all layers and some data for the whole net
1066 * does not know the layer data though (i.e. values of the nodes and weights)
1067 */
1068 class Net
1069 {
1070 public:
1071
1072 typedef std::vector<double> container_type;
1073 typedef container_type::iterator iterator_type;
1074 typedef std::pair<iterator_type,iterator_type> begin_end_type;
1075
1076
1077 /*! \brief c'tor
1078 *
1079 *
1080 */
1083 , m_sizeInput (0)
1084 , m_layers ()
1085 {
1086 }
1087
1088 /*! \brief d'tor
1089 *
1090 *
1091 */
1092 Net (const Net& other)
1094 , m_sizeInput (other.m_sizeInput)
1095 , m_layers (other.m_layers)
1096 {
1097 }
1098
1099 void setInputSize (size_t sizeInput) { m_sizeInput = sizeInput; } ///< set the input size of the DNN
1100 void setOutputSize (size_t sizeOutput) { m_sizeOutput = sizeOutput; } ///< set the output size of the DNN
1101 void addLayer (Layer& layer) { m_layers.push_back (layer); } ///< add a layer (layout)
1102 void addLayer (Layer&& layer) { m_layers.push_back (layer); }
1103 void setErrorFunction (ModeErrorFunction eErrorFunction) { m_eErrorFunction = eErrorFunction; } ///< which error function is to be used
1104
1105 size_t inputSize () const { return m_sizeInput; } ///< input size of the DNN
1106 size_t outputSize () const { return m_sizeOutput; } ///< output size of the DNN
1107
1108 /*! \brief set the drop out configuration
1109 *
1110 *
1111 */
1112 template <typename WeightsType, typename DropProbabilities>
1113 void dropOutWeightFactor (WeightsType& weights,
1114 const DropProbabilities& drops,
1115 bool inverse = false);
1116
1117 /*! \brief start the training
1118 *
1119 * \param weights weight vector
1120 * \param trainPattern training pattern
1121 * \param testPattern test pattern
1122 * \param minimizer use this minimizer for training (e.g. SGD)
1123 * \param settings settings used for this training run
1124 */
1125 template <typename Minimizer>
1126 double train (std::vector<double>& weights,
1127 std::vector<Pattern>& trainPattern,
1128 const std::vector<Pattern>& testPattern,
1129 Minimizer& minimizer,
1130 Settings& settings);
1131
1132 /*! \brief pre-training for future use
1133 *
1134 *
1135 */
1136 template <typename Minimizer>
1137 void preTrain (std::vector<double>& weights,
1138 std::vector<Pattern>& trainPattern,
1139 const std::vector<Pattern>& testPattern,
1140 Minimizer& minimizer, Settings& settings);
1141
1142
1143 /*! \brief executes one training cycle
1144 *
1145 * \param minimizier the minimizer to be used
1146 * \param weights the weight vector to be used
1147 * \param itPatternBegin the pattern to be trained with
1148 * \param itPatternEnd the pattern to be trainied with
1149 * \param settings the settings for the training
1150 * \param dropContainer the configuration for DNN drop-out
1151 */
1152 template <typename Iterator, typename Minimizer>
1153 inline double trainCycle (Minimizer& minimizer, std::vector<double>& weights,
1154 Iterator itPatternBegin, Iterator itPatternEnd,
1155 Settings& settings,
1156 DropContainer& dropContainer);
1157
1158 size_t numWeights (size_t trainingStartLayer = 0) const; ///< returns the number of weights in this net
1159 size_t numNodes (size_t trainingStartLayer = 0) const; ///< returns the number of nodes in this net
1160
1161 template <typename Weights>
1162 std::vector<double> compute (const std::vector<double>& input, const Weights& weights) const; ///< compute the net with the given input and the given weights
1163
1164 template <typename Weights, typename PassThrough>
1165 double operator() (PassThrough& settingsAndBatch, const Weights& weights) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradients
1166
1167 template <typename Weights, typename PassThrough, typename OutContainer>
1168 double operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput eFetch, OutContainer& outputContainer) const; ///< execute computation of the DNN for one mini-batch; helper function
1169
1170 template <typename Weights, typename Gradients, typename PassThrough>
1171 double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const; ///< execute computation of the DNN for one mini-batch (used by the minimizer); returns gradients as well
1172
1173 template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1174 double operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const;
1175
1176
1177 template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1178 std::vector<std::vector<LayerData>> prepareLayerData (LayerContainer& layers,
1179 Batch& batch,
1180 const DropContainer& dropContainer,
1181 ItWeight itWeightBegin,
1182 ItWeight itWeightEnd,
1183 ItGradient itGradientBegin,
1184 ItGradient itGradientEnd,
1185 size_t& totalNumWeights) const;
1186
1187 template <typename LayerContainer>
1188 void forwardPattern (const LayerContainer& _layers,
1189 std::vector<LayerData>& layerData) const;
1190
1191
1192 template <typename LayerContainer, typename LayerPatternContainer>
1193 void forwardBatch (const LayerContainer& _layers,
1194 LayerPatternContainer& layerPatternData,
1195 std::vector<double>& valuesMean,
1196 std::vector<double>& valuesStdDev,
1197 size_t trainFromLayer) const;
1198
1199 template <typename OutputContainer>
1200 void fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const;
1201
1202 template <typename OutputContainer>
1203 void fetchOutput (const std::vector<LayerData>& layerPatternData, OutputContainer& outputContainer) const;
1204
1205
1206 template <typename ItWeight>
1207 std::tuple</*sumError*/double,/*sumWeights*/double> computeError (const Settings& settings,
1208 std::vector<LayerData>& lastLayerData,
1209 Batch& batch,
1210 ItWeight itWeightBegin,
1211 ItWeight itWeightEnd) const;
1212
1213 template <typename Settings>
1214 void backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1215 const Settings& settings,
1216 size_t trainFromLayer,
1217 size_t totalNumWeights) const;
1218
1219
1220
1221 /*! \brief main NN computation function
1222 *
1223 *
1224 */
1225 template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1226 double forward_backward (LayerContainer& layers, PassThrough& settingsAndBatch,
1227 ItWeight itWeightBegin, ItWeight itWeightEnd,
1228 ItGradient itGradientBegin, ItGradient itGradientEnd,
1229 size_t trainFromLayer,
1230 OutContainer& outputContainer, bool fetchOutput) const;
1231
1232
1233
1234 double E ();
1235 void dE ();
1236
1237
1238 /*! \brief computes the error of the DNN
1239 *
1240 *
1241 */
1242 template <typename Container, typename ItWeight>
1243 double errorFunction (LayerData& layerData,
1244 Container truth,
1245 ItWeight itWeight,
1246 ItWeight itWeightEnd,
1247 double patternWeight,
1248 double factorWeightDecay,
1249 EnumRegularization eRegularization) const;
1250
1251
1252 const std::vector<Layer>& layers () const { return m_layers; } ///< returns the layers (structure)
1253 std::vector<Layer>& layers () { return m_layers; } ///< returns the layers (structure)
1254
1255 void removeLayer () { m_layers.pop_back (); } ///< remove one layer
1256
1257
1258 void clear () ///< clear one layer
1259 {
1260 m_layers.clear ();
1262 }
1263
1264
1265 template <typename OutIterator>
1267 OutIterator itWeight); ///< initialize the weights with the given strategy
1268
1269 protected:
1270
1271 void fillDropContainer (DropContainer& dropContainer, double dropFraction, size_t numNodes) const; ///< prepare the drop-out-container (select the nodes which are to be dropped out)
1272
1273
1274 private:
1275
1276 ModeErrorFunction m_eErrorFunction; ///< denotes the error function
1277 size_t m_sizeInput; ///< input size of this DNN
1278 size_t m_sizeOutput; ///< outut size of this DNN
1279 std::vector<Layer> m_layers; ///< layer-structure-data
1280
1281 protected:
1282 // variables for JsMVA (interactive training in jupyter notebook)
1284 bool * fExitFromTraining = nullptr;
1285 UInt_t *fIPyMaxIter = nullptr, *fIPyCurrentIter = nullptr;
1286
1287 public:
1288
1289 // setup ipython interactive variables
1291 fInteractive = fI;
1292 fExitFromTraining = fE;
1293 fIPyMaxIter = M;
1295 }
1296 };
1297
1298
1299
1300
1301typedef std::tuple<Settings&, Batch&, DropContainer&> pass_through_type;
1302
1303
1304
1305
1306
1307
1308
1309 } // namespace DNN
1310} // namespace TMVA
1311
1312
1313// include the implementations (in header file, because they are templated)
1314#include "TMVA/NeuralNet.icc"
1315
1316#endif
1317
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
#define e(i)
Definition: RSha256.hxx:103
#define NONE
Definition: Rotated.cxx:52
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
EColor
Definition: Rtypes.h:63
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
double sqrt(double)
Double_t(* Function)(Double_t)
Definition: Functor.C:4
The Batch class encapsulates one mini-batch.
Definition: NeuralNet.h:236
const_iterator m_itEnd
iterator denoting the end of the batch
Definition: NeuralNet.h:252
const_iterator begin() const
Definition: NeuralNet.h:245
const_iterator end() const
Definition: NeuralNet.h:246
Batch(typename std::vector< Pattern >::const_iterator itBegin, typename std::vector< Pattern >::const_iterator itEnd)
Definition: NeuralNet.h:240
size_t size() const
Definition: NeuralNet.h:248
std::vector< Pattern >::const_iterator const_iterator
Definition: NeuralNet.h:238
const_iterator m_itBegin
iterator denoting the beginning of the batch
Definition: NeuralNet.h:251
Settings for classificationused to distinguish between different function signatures.
Definition: NeuralNet.h:902
void startTrainCycle()
action to be done when the training cycle is started (e.g.
Definition: NeuralNet.cxx:281
void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:937
void endTrainCycle(double)
action to be done when the training cycle is ended (e.g.
Definition: NeuralNet.cxx:296
virtual void endTestCycle()
action to be done when the training cycle is ended (e.g.
Definition: NeuralNet.cxx:326
std::vector< Pattern > * m_pResultPatternContainer
Definition: NeuralNet.h:1016
void setResultComputation(std::string _fileNameNetConfig, std::string _fileNameResult, std::vector< Pattern > *_resultPatternContainer)
preparation for monitoring output
Definition: NeuralNet.cxx:523
ClassificationSettings(TString name, size_t _convergenceSteps=15, size_t _batchSize=10, size_t _testRepetitions=7, double _factorWeightDecay=1e-5, EnumRegularization _regularization=EnumRegularization::NONE, size_t _scaleToNumEvents=0, MinimizerType _eMinimizerType=MinimizerType::fSteepest, double _learningRate=1e-5, double _momentum=0.3, int _repetitions=3, bool _useMultithreading=true)
c'tor
Definition: NeuralNet.h:908
std::vector< double > m_input
Definition: NeuralNet.h:1002
std::vector< double > m_significances
Definition: NeuralNet.h:1008
std::vector< double > m_weights
Definition: NeuralNet.h:1005
virtual ~ClassificationSettings()
d'tor
Definition: NeuralNet.h:931
std::vector< double > m_targets
Definition: NeuralNet.h:1004
void testSample(double error, double output, double target, double weight)
action to be done after the computation of a test sample (e.g.
Definition: NeuralNet.cxx:304
virtual void startTestCycle()
action to be done when the test cycle is started (e.g.
Definition: NeuralNet.cxx:316
void setWeightSums(double sumOfSigWeights, double sumOfBkgWeights)
set the weight sums to be scaled to (preparations for monitoring output)
Definition: NeuralNet.cxx:515
std::vector< double > m_ams
Definition: NeuralNet.h:1007
std::vector< double > m_output
Definition: NeuralNet.h:1003
LayerData holds the data of one layer.
Definition: NeuralNet.h:438
const_iterator_type m_itInputBegin
iterator to the first of the nodes in the input node vector
Definition: NeuralNet.h:643
const_iterator_type deltasBegin() const
returns const iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:598
iterator_type valuesBegin()
returns iterator to the begin of the (node) values
Definition: NeuralNet.h:589
const_iterator_type valuesEnd() const
returns iterator to the end of the (node) values
Definition: NeuralNet.h:587
bool m_hasGradients
does this layer have gradients (only if in training mode)
Definition: NeuralNet.h:660
std::vector< double > m_deltas
stores the deltas for the DNN training
Definition: NeuralNet.h:646
container_type::iterator iterator_type
Definition: NeuralNet.h:442
LayerData(const_iterator_type itInputBegin, const_iterator_type itInputEnd, ModeOutputValues eModeOutput=ModeOutputValues::DIRECT)
c'tor of LayerData
Definition: NeuralNet.cxx:81
void setDropOut(Iterator itDrop)
set the drop-out info for this layer
Definition: NeuralNet.h:618
void setInput(const_iterator_type itInputBegin, const_iterator_type itInputEnd)
change the input iterators
Definition: NeuralNet.h:569
std::vector< std::function< double(double)> > function_container_type
Definition: NeuralNet.h:445
iterator_type valuesEnd()
returns iterator to the end of the (node) values
Definition: NeuralNet.h:590
const_dropout_iterator m_itDropOut
iterator to a container indicating if the corresponding node is to be dropped
Definition: NeuralNet.h:649
iterator_type valueGradientsBegin()
returns iterator to the begin of the gradients of the node values
Definition: NeuralNet.h:601
iterator_type gradientsBegin()
returns iterator to the begin of the gradients
Definition: NeuralNet.h:607
iterator_type deltasBegin()
returns iterator to the begin of the deltas (back-propagation)
Definition: NeuralNet.h:595
bool m_hasWeights
does this layer have weights (it does not if it is the input layer)
Definition: NeuralNet.h:659
const_dropout_iterator dropOut() const
return the begin of the drop-out information
Definition: NeuralNet.h:627
LayerData(LayerData &&other)
move c'tor of LayerData
Definition: NeuralNet.h:542
std::vector< double > container_type
Definition: NeuralNet.h:440
size_t size() const
return the size of the layer
Definition: NeuralNet.h:629
const_iterator_type weightsBegin() const
returns const iterator to the begin of the weights for this layer
Definition: NeuralNet.h:609
function_container_type::const_iterator const_function_iterator_type
Definition: NeuralNet.h:447
LayerData(const LayerData &other)
copy c'tor of LayerData
Definition: NeuralNet.h:519
function_container_type::iterator function_iterator_type
Definition: NeuralNet.h:446
std::vector< double > m_values
stores the values of the nodes in this layer
Definition: NeuralNet.h:648
const_iterator_type m_itInputEnd
iterator to the end of the nodes in the input node vector
Definition: NeuralNet.h:644
container_type::const_iterator const_iterator_type
Definition: NeuralNet.h:443
ModeOutputValues outputMode() const
returns the output mode
Definition: NeuralNet.h:592
iterator_type m_itGradientBegin
iterator to the first gradient of this layer in the gradient vector
Definition: NeuralNet.h:653
const_iterator_type gradientsBegin() const
returns const iterator to the begin of the gradients
Definition: NeuralNet.h:608
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
Definition: NeuralNet.h:612
iterator_type deltasEnd()
returns iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:596
std::vector< double > m_valueGradients
stores the gradients of the values (nodes)
Definition: NeuralNet.h:647
const_iterator_type m_itConstWeightBegin
const iterator to the first weight of this layer in the weight vector
Definition: NeuralNet.h:652
iterator_type valueGradientsEnd()
returns iterator to the end of the gradients of the node values
Definition: NeuralNet.h:602
void clear()
clear the values and the deltas
Definition: NeuralNet.h:580
std::shared_ptr< std::function< double(double)> > activationFunction() const
Definition: NeuralNet.h:611
container_type computeProbabilities() const
compute the probabilities from the node values
Definition: NeuralNet.cxx:140
const_iterator_type deltasEnd() const
returns const iterator to the end of the deltas (back-propagation)
Definition: NeuralNet.h:599
bool m_hasDropOut
dropOut is turned on?
Definition: NeuralNet.h:650
bool m_isInputLayer
is this layer an input layer
Definition: NeuralNet.h:658
bool hasDropOut() const
has this layer drop-out turned on?
Definition: NeuralNet.h:626
const_iterator_type valueGradientsBegin() const
returns const iterator to the begin of the gradients
Definition: NeuralNet.h:604
const_iterator_type valueGradientsEnd() const
returns const iterator to the end of the gradients
Definition: NeuralNet.h:605
container_type probabilities() const
computes the probabilities from the current node values and returns them
Definition: NeuralNet.h:593
void clearDropOut()
clear the drop-out-data for this layer
Definition: NeuralNet.h:624
ModeOutputValues m_eModeOutput
stores the output mode (DIRECT, SIGMOID, SOFTMAX)
Definition: NeuralNet.h:662
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
inverse activation function for this layer
Definition: NeuralNet.h:656
DropContainer::const_iterator const_dropout_iterator
Definition: NeuralNet.h:449
const_iterator_type valuesBegin() const
returns const iterator to the begin of the (node) values
Definition: NeuralNet.h:586
std::shared_ptr< std::function< double(double)> > m_activationFunction
activation function for this layer
Definition: NeuralNet.h:655
Layer defines the layout of a layer.
Definition: NeuralNet.h:677
void modeOutputValues(ModeOutputValues eModeOutputValues)
set the mode-output-value
Definition: NeuralNet.h:690
std::shared_ptr< std::function< double(double)> > m_activationFunction
stores the activation function
Definition: NeuralNet.h:703
std::shared_ptr< std::function< double(double)> > activationFunction() const
fetch the activation function for this layer
Definition: NeuralNet.h:695
size_t m_numNodes
Definition: NeuralNet.h:707
std::shared_ptr< std::function< double(double)> > m_inverseActivationFunction
stores the inverse activation function
Definition: NeuralNet.h:704
size_t numNodes() const
return the number of nodes of this layer
Definition: NeuralNet.h:692
ModeOutputValues m_eModeOutputValues
do the output values of this layer have to be transformed somehow (e.g. to probabilities) or returned...
Definition: NeuralNet.h:709
size_t numWeights(size_t numInputNodes) const
return the number of weights for this layer (fully connected)
Definition: NeuralNet.h:693
std::shared_ptr< std::function< double(double)> > inverseActivationFunction() const
fetch the inverse activation function for this layer
Definition: NeuralNet.h:696
EnumFunction m_activationFunctionType
Definition: NeuralNet.h:710
Layer(size_t numNodes, EnumFunction activationFunction, ModeOutputValues eModeOutputValues=ModeOutputValues::DIRECT)
c'tor for defining a Layer
Definition: NeuralNet.cxx:166
EnumFunction activationFunctionType() const
get the activation function type for this layer
Definition: NeuralNet.h:698
ModeOutputValues modeOutputValues() const
get the mode-output-value (direct, probabilities)
Definition: NeuralNet.h:689
double mean() const
Definition: NeuralNet.h:127
double var_corr() const
Definition: NeuralNet.h:137
void add(T value, double weight=1.0)
Definition: NeuralNet.h:94
double stdDev_corr() const
Definition: NeuralNet.h:145
double weights() const
Definition: NeuralNet.h:126
void add(ITERATOR itBegin, ITERATOR itEnd)
Definition: NeuralNet.h:117
double var() const
Definition: NeuralNet.h:128
double stdDev() const
Definition: NeuralNet.h:146
neural net
Definition: NeuralNet.h:1069
void setInputSize(size_t sizeInput)
set the input size of the DNN
Definition: NeuralNet.h:1099
std::vector< Layer > & layers()
returns the layers (structure)
Definition: NeuralNet.h:1253
void forwardBatch(const LayerContainer &_layers, LayerPatternContainer &layerPatternData, std::vector< double > &valuesMean, std::vector< double > &valuesStdDev, size_t trainFromLayer) const
Definition: NeuralNet.icc:1245
Net(const Net &other)
d'tor
Definition: NeuralNet.h:1092
bool * fExitFromTraining
Definition: NeuralNet.h:1284
std::vector< Layer > m_layers
layer-structure-data
Definition: NeuralNet.h:1279
UInt_t * fIPyMaxIter
Definition: NeuralNet.h:1285
void SetIpythonInteractive(IPythonInteractive *fI, bool *fE, UInt_t *M, UInt_t *C)
Definition: NeuralNet.h:1290
std::vector< double > compute(const std::vector< double > &input, const Weights &weights) const
compute the net with the given input and the given weights
Definition: NeuralNet.icc:1039
std::vector< double > container_type
Definition: NeuralNet.h:1072
container_type::iterator iterator_type
Definition: NeuralNet.h:1073
void preTrain(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
pre-training for future use
void fetchOutput(const LayerData &lastLayerData, OutputContainer &outputContainer) const
Definition: NeuralNet.icc:1300
size_t inputSize() const
input size of the DNN
Definition: NeuralNet.h:1105
std::pair< iterator_type, iterator_type > begin_end_type
Definition: NeuralNet.h:1074
ModeErrorFunction m_eErrorFunction
denotes the error function
Definition: NeuralNet.h:1276
void addLayer(Layer &&layer)
Definition: NeuralNet.h:1102
size_t numNodes(size_t trainingStartLayer=0) const
returns the number of nodes in this net
Definition: NeuralNet.cxx:559
double train(std::vector< double > &weights, std::vector< Pattern > &trainPattern, const std::vector< Pattern > &testPattern, Minimizer &minimizer, Settings &settings)
start the training
Definition: NeuralNet.icc:712
const std::vector< Layer > & layers() const
returns the layers (structure)
Definition: NeuralNet.h:1252
std::vector< std::vector< LayerData > > prepareLayerData(LayerContainer &layers, Batch &batch, const DropContainer &dropContainer, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t &totalNumWeights) const
Definition: NeuralNet.icc:1113
void setErrorFunction(ModeErrorFunction eErrorFunction)
which error function is to be used
Definition: NeuralNet.h:1103
void initializeWeights(WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
initialize the weights with the given strategy
Definition: NeuralNet.icc:1483
size_t outputSize() const
output size of the DNN
Definition: NeuralNet.h:1106
double errorFunction(LayerData &layerData, Container truth, ItWeight itWeight, ItWeight itWeightEnd, double patternWeight, double factorWeightDecay, EnumRegularization eRegularization) const
computes the error of the DNN
Definition: NeuralNet.icc:1592
double forward_backward(LayerContainer &layers, PassThrough &settingsAndBatch, ItWeight itWeightBegin, ItWeight itWeightEnd, ItGradient itGradientBegin, ItGradient itGradientEnd, size_t trainFromLayer, OutContainer &outputContainer, bool fetchOutput) const
main NN computation function
Definition: NeuralNet.icc:1418
void removeLayer()
remove one layer
Definition: NeuralNet.h:1255
size_t m_sizeOutput
outut size of this DNN
Definition: NeuralNet.h:1278
size_t m_sizeInput
input size of this DNN
Definition: NeuralNet.h:1277
double trainCycle(Minimizer &minimizer, std::vector< double > &weights, Iterator itPatternBegin, Iterator itPatternEnd, Settings &settings, DropContainer &dropContainer)
executes one training cycle
Definition: NeuralNet.icc:941
UInt_t * fIPyCurrentIter
Definition: NeuralNet.h:1285
double operator()(PassThrough &settingsAndBatch, const Weights &weights) const
execute computation of the DNN for one mini-batch (used by the minimizer); no computation of gradient...
Definition: NeuralNet.icc:1072
void dropOutWeightFactor(WeightsType &weights, const DropProbabilities &drops, bool inverse=false)
set the drop out configuration
Definition: NeuralNet.icc:652
void fillDropContainer(DropContainer &dropContainer, double dropFraction, size_t numNodes) const
prepare the drop-out-container (select the nodes which are to be dropped out)
Definition: NeuralNet.cxx:575
void addLayer(Layer &layer)
add a layer (layout)
Definition: NeuralNet.h:1101
size_t numWeights(size_t trainingStartLayer=0) const
returns the number of weights in this net
Definition: NeuralNet.cxx:543
IPythonInteractive * fInteractive
Definition: NeuralNet.h:1283
std::tuple< double, double > computeError(const Settings &settings, std::vector< LayerData > &lastLayerData, Batch &batch, ItWeight itWeightBegin, ItWeight itWeightEnd) const
Definition: NeuralNet.icc:1330
void setOutputSize(size_t sizeOutput)
set the output size of the DNN
Definition: NeuralNet.h:1100
void forwardPattern(const LayerContainer &_layers, std::vector< LayerData > &layerData) const
Definition: NeuralNet.icc:1225
void backPropagate(std::vector< std::vector< LayerData > > &layerPatternData, const Settings &settings, size_t trainFromLayer, size_t totalNumWeights) const
Definition: NeuralNet.icc:1367
Settings for the training of the neural net.
Definition: NeuralNet.h:737
size_t m_batchSize
mini-batch size
Definition: NeuralNet.h:845
void setDropOut(Iterator begin, Iterator end, size_t _dropRepetitions)
set the drop-out configuration (layer-wise)
Definition: NeuralNet.h:766
void create(std::string histoName, int bins, double min, double max, int bins2, double min2, double max2)
for monitoring
Definition: NeuralNet.h:827
bool useMultithreading() const
is multithreading turned on?
Definition: NeuralNet.h:822
EnumRegularization regularization() const
some regularization of the DNN is turned on?
Definition: NeuralNet.h:820
size_t convergenceCount() const
returns the current convergence count
Definition: NeuralNet.h:834
double momentum() const
get the momentum (e.g. for SGD)
Definition: NeuralNet.h:779
Timer m_timer
timer for monitoring
Definition: NeuralNet.h:839
size_t testRepetitions() const
how often is the test data tested
Definition: NeuralNet.h:775
void clear(std::string histoName)
for monitoring
Definition: NeuralNet.h:831
virtual void endTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:812
MinimizerType fMinimizerType
Definition: NeuralNet.h:862
void addPoint(std::string histoName, double x, double y)
for monitoring
Definition: NeuralNet.h:829
void setMonitoring(std::shared_ptr< Monitoring > ptrMonitoring)
prepared for monitoring
Definition: NeuralNet.h:771
virtual void testIteration()
callback for monitoring and loggging
Definition: NeuralNet.h:813
size_t m_convergenceSteps
number of steps without improvement to consider the DNN to have converged
Definition: NeuralNet.h:844
virtual bool hasConverged(double testError)
has this training converged already?
Definition: NeuralNet.cxx:488
MinimizerType minimizerType() const
which minimizer shall be used (e.g. SGD)
Definition: NeuralNet.h:781
std::vector< double > m_dropOut
Definition: NeuralNet.h:857
double m_minProgress
current limits for the progress bar
Definition: NeuralNet.h:840
virtual void cycle(double progress, TString text)
Definition: NeuralNet.h:806
Settings(TString name, size_t _convergenceSteps=15, size_t _batchSize=10, size_t _testRepetitions=7, double _factorWeightDecay=1e-5, TMVA::DNN::EnumRegularization _regularization=TMVA::DNN::EnumRegularization::NONE, MinimizerType _eMinimizerType=MinimizerType::fSteepest, double _learningRate=1e-5, double _momentum=0.3, int _repetitions=3, bool _multithreading=true)
c'tor
Definition: NeuralNet.cxx:232
virtual void setProgressLimits(double minProgress=0, double maxProgress=100)
Definition: NeuralNet.h:797
double m_maxProgress
current limits for the progress bar
Definition: NeuralNet.h:841
virtual void endTrainCycle(double)
callback for monitoring and logging
Definition: NeuralNet.h:795
virtual void drawSample(const std::vector< double > &, const std::vector< double > &, const std::vector< double > &, double)
callback for monitoring and loggging
Definition: NeuralNet.h:814
double learningRate() const
get the learning rate
Definition: NeuralNet.h:778
double m_dropRepetitions
Definition: NeuralNet.h:856
const std::vector< double > & dropFractions() const
Definition: NeuralNet.h:769
void addPoint(std::string histoName, double x)
for monitoring
Definition: NeuralNet.h:828
virtual ~Settings()
d'tor
Definition: NeuralNet.cxx:261
size_t m_convergenceCount
Definition: NeuralNet.h:864
EnumRegularization m_regularization
Definition: NeuralNet.h:854
int repetitions() const
how many steps have to be gone until the batch is changed
Definition: NeuralNet.h:780
virtual void testSample(double, double, double, double)
virtual function to be used for monitoring (callback)
Definition: NeuralNet.h:788
void plot(std::string histoName, std::string options, int pad, EColor color)
for monitoring
Definition: NeuralNet.h:830
virtual void startTrainCycle()
Definition: NeuralNet.h:789
size_t convergenceSteps() const
how many steps until training is deemed to have converged
Definition: NeuralNet.h:773
double m_factorWeightDecay
Definition: NeuralNet.h:847
double factorWeightDecay() const
get the weight-decay factor
Definition: NeuralNet.h:776
bool exists(std::string histoName)
for monitoring
Definition: NeuralNet.h:832
size_t maxConvergenceCount() const
returns the max convergence count so far
Definition: NeuralNet.h:835
void pads(int numPads)
preparation for monitoring
Definition: NeuralNet.h:825
size_t m_testRepetitions
Definition: NeuralNet.h:846
size_t batchSize() const
mini-batch size
Definition: NeuralNet.h:774
virtual void computeResult(const Net &, std::vector< double > &)
callback for monitoring and loggging
Definition: NeuralNet.h:816
std::shared_ptr< Monitoring > fMonitoring
Definition: NeuralNet.h:872
size_t dropRepetitions() const
Definition: NeuralNet.h:768
void create(std::string histoName, int bins, double min, double max)
for monitoring
Definition: NeuralNet.h:826
size_t minError() const
returns the smallest error so far
Definition: NeuralNet.h:836
virtual void startTraining()
Definition: NeuralNet.h:802
size_t m_maxConvergenceCount
Definition: NeuralNet.h:865
virtual void startTestCycle()
callback for monitoring and loggging
Definition: NeuralNet.h:811
Steepest Gradient Descent algorithm (SGD)
Definition: NeuralNet.h:335
double m_beta
internal parameter (momentum)
Definition: NeuralNet.h:373
std::vector< double > m_localGradients
local gradients for reuse in thread.
Definition: NeuralNet.h:377
std::vector< double > m_prevGradients
vector remembers the gradients of the previous step
Definition: NeuralNet.h:374
double m_alpha
internal parameter (learningRate)
Definition: NeuralNet.h:372
std::vector< double > m_localWeights
local weights for reuse in thread.
Definition: NeuralNet.h:376
double operator()(Function &fitnessFunction, Weights &weights, PassThrough &passThrough)
operator to call the steepest gradient descent algorithm
Definition: NeuralNet.icc:271
Steepest(double learningRate=1e-4, double momentum=0.5, size_t repetitions=10)
c'tor
Definition: NeuralNet.h:349
This class is needed by JsMVA, and it's a helper class for tracking errors during the training in Jup...
Definition: MethodBase.h:94
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:196
Basic string class.
Definition: TString.h:131
TText * text
const Double_t sigma
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
static double Q[]
static double C[]
double T(double x)
Definition: ChebyshevPol.h:34
void function(const Char_t *name_, T fun, const Char_t *docstring=0)
Definition: RExports.h:151
RooCmdArg Minimizer(const char *type, const char *alg=0)
double sumOfSquares(ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
double uniformDouble(double minValue, double maxValue)
Definition: NeuralNet.cxx:43
void forward(const LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
apply the weights (and functions) in forward direction of the DNN
Definition: NeuralNet.icc:546
void applyFunctions(ItValue itValue, ItValue itValueEnd, ItFunction itFunction)
ModeOutputValues operator|(ModeOutputValues lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:189
EnumRegularization
Definition: NeuralNet.h:174
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:412
void backward(LAYERDATA &prevLayerData, LAYERDATA &currLayerData)
backward application of the weights (back-propagation of the error)
Definition: NeuralNet.icc:572
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
ModeOutputValues operator&=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:205
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:214
ModeErrorFunction
error functions to be chosen from
Definition: NeuralNet.h:1046
double softMaxCrossEntropy(ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
soft-max-cross-entropy error function (for mutual exclusive cross-entropy)
Definition: NeuralNet.icc:458
WeightInitializationStrategy
weight initialization strategies to be chosen from
Definition: NeuralNet.h:1057
ModeOutputValues operator|=(ModeOutputValues &lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:194
MinimizerType
< list all the minimizer types
Definition: NeuralNet.h:322
@ fSteepest
SGD.
Definition: NeuralNet.h:323
double gaussDouble(double mean, double sigma)
Definition: NeuralNet.cxx:35
ModeOutputValues operator&(ModeOutputValues lhs, ModeOutputValues rhs)
Definition: NeuralNet.h:200
ModeOutputValues
Definition: NeuralNet.h:180
void applyWeights(ItSource itSourceBegin, ItSource itSourceEnd, ItWeight itWeight, ItTarget itTargetBegin, ItTarget itTargetEnd)
std::tuple< Settings &, Batch &, DropContainer & > pass_through_type
Definition: NeuralNet.h:1301
bool isFlagSet(T flag, T value)
Definition: NeuralNet.h:213
int randomInt(int maxValue)
Definition: NeuralNet.cxx:52
void update(ItSource itSource, ItSource itSourceEnd, ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd, ItTargetGradient itTargetGradientBegin, ItGradient itGradient)
update the gradients
Definition: NeuralNet.icc:183
std::vector< char > DropContainer
Definition: NeuralNet.h:220
void applyWeightsBackwards(ItSource itCurrBegin, ItSource itCurrEnd, ItWeight itWeight, ItPrev itPrevBegin, ItPrev itPrevEnd)
create variable transformations
static void output(int code)
Definition: gifencode.c:226