Logo ROOT   6.14/05
Reference Guide
Layer.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/06/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////////////////
13 // Contains Layer and SharedLayer classes, that represent layers in //
14 // neural networks. //
15 //////////////////////////////////////////////////////////////////////
16 
17 #ifndef TMVA_DNN_LAYER
18 #define TMVA_DNN_LAYER
19 
20 #include <iostream>
21 
22 #include "TMatrix.h"
23 #include "Functions.h"
24 
25 namespace TMVA
26 {
27 namespace DNN
28 {
29 
30 //______________________________________________________________________________
31 //
32 // The Layer Class
33 //______________________________________________________________________________
34 
35 /** \class TLayer
36 
37  Generic layer class.
38 
39  This generic layer class represents a layer of a neural network with
40  a given width n and activation function f. The activation
41  function of each layer is given by \f$\mathbf{u} =
42  \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$.
43 
44  In addition to the weight and bias matrices, each layer allocates memory
45  for its activations and the corresponding first partial fDerivatives of
46  the activation function as well as the gradients of the fWeights and fBiases.
47 
48  The layer provides member functions for the forward propagation of
49  activations through the given layer.
50 */
51 template<typename Architecture_t>
52  class TLayer
53 {
54 
55 public:
56  using Scalar_t = typename Architecture_t::Scalar_t;
57  using Matrix_t = typename Architecture_t::Matrix_t;
58 
59 private:
60 
61  size_t fBatchSize; ///< Batch size used for training and evaluation.
62  size_t fInputWidth; ///< Number of neurons of the previous layer.
63  size_t fWidth; ///< Number of neurons of this layer.
64 
65  Scalar_t fDropoutProbability; ///< Probability that an input is active.
66 
67  Matrix_t fWeights; ///< The fWeights of this layer.
68  Matrix_t fBiases; ///< The bias values of this layer.
69  Matrix_t fOutput; ///< Activations of this layer.
70  Matrix_t fDerivatives; ///< First fDerivatives of the activations of this layer.
71  Matrix_t fWeightGradients; ///< Gradients w.r.t. the weigths of this layer.
72  Matrix_t fBiasGradients; ///< Gradients w.r.t. the bias values of this layer.
73  Matrix_t fActivationGradients; ///< Gradients w.r.t. the activations of this layer.
74 
75  EActivationFunction fF; ///< Activation function of the layer.
76 
77 public:
78 
79  TLayer(size_t BatchSize,
80  size_t InputWidth,
81  size_t Width,
83  Scalar_t dropoutProbability);
84  TLayer(const TLayer &);
85 
86  /*! Initialize fWeights according to the given initialization
87  * method. */
89  /*! Compute activation of the layer for the given input. The input
90  * must be in matrix form with the different rows corresponding to
91  * different events in the batch. Computes activations as well as
92  * the first partial derivative of the activation function at those
93  * activations. */
94  void inline Forward(Matrix_t & input, bool applyDropout = false);
95  /*! Compute weight, bias and activation gradients. Uses the precomputed
96  * first partial derviatives of the activation function computed during
97  * forward propagation and modifies them. Must only be called directly
98  * a the corresponding call to Forward(...). */
99  void inline Backward(Matrix_t & gradients_backward,
100  const Matrix_t & activations_backward,
103 
104  void Print() const;
105 
106  size_t GetBatchSize() const {return fBatchSize;}
107  size_t GetInputWidth() const {return fInputWidth;}
108  size_t GetWidth() const {return fWidth;}
110 
111  void SetDropoutProbability(Scalar_t p) {fDropoutProbability = p;}
112 
114 
115  Matrix_t & GetOutput() {return fOutput;}
116  const Matrix_t & GetOutput() const {return fOutput;}
118  const Matrix_t & GetWeights() const {return fWeights;}
119  Matrix_t & GetBiases() {return fBiases;}
120  const Matrix_t & GetBiases() const {return fBiases;}
124  const Matrix_t & GetBiasGradients() const {return fBiasGradients;}
126  const Matrix_t & GetWeightGradients() const {return fWeightGradients;}
127 
128 };
129 
130 //______________________________________________________________________________
131 //
132 // The Shared Layer Class
133 //______________________________________________________________________________
134 
135 /** \class TSharedLayer
136 
137  Layer class width shared weight and bias layers.
138 
139  Like the Layer class only that weight matrices are shared between
140  different instances of the net, which can be used to implement
141  multithreading 'Hogwild' style.
142 */
143 
144 template<typename Architecture_t>
146 {
147 
148 public:
149 
150  using Scalar_t = typename Architecture_t::Scalar_t;
151  using Matrix_t = typename Architecture_t::Matrix_t;
152 
153 private:
154 
155  size_t fBatchSize; ///< Batch size used for training and evaluation.
156  size_t fInputWidth; ///< Number of neurons of the previous layer.
157  size_t fWidth; ///< Number of neurons of this layer.
158 
159  Scalar_t fDropoutProbability; ///< Probability that an input is active.
160 
161  Matrix_t & fWeights; ///< Reference to the weight matrix of this layer.
162  Matrix_t & fBiases; ///< Reference to the bias vectors of this layer.
163  Matrix_t fOutput; ///< Activations of this layer.
164  Matrix_t fDerivatives; ///< First fDerivatives of the activations of this layer.
165  Matrix_t fWeightGradients; ///< Gradients w.r.t. the weigths of this layer.
166  Matrix_t fBiasGradients; ///< Gradients w.r.t. the bias values of this layer.
167  Matrix_t fActivationGradients; ///< Gradients w.r.t. the activations of this layer.
168 
169  EActivationFunction fF; ///< Activation function of the layer.
170 
171 public:
172 
173  TSharedLayer(size_t fBatchSize,
174  TLayer<Architecture_t> & layer);
175  TSharedLayer(const TSharedLayer & layer);
176 
177  /*! Compute activation of the layer for the given input. The input
178  * must be in matrix form with the different rows corresponding to
179  * different events in the batch. Computes activations as well as
180  * the first partial derivative of the activation function at those
181  * activations. */
182  void inline Forward(Matrix_t & input, bool applyDropout = false);
183  /*! Compute weight, bias and activation gradients. Uses the precomputed
184  * first partial derviatives of the activation function computed during
185  * forward propagation and modifies them. Must only be called directly
186  * a the corresponding call to Forward(...). */
187  void inline Backward(Matrix_t & gradients_backward,
188  const Matrix_t & activations_backward,
191 
192  void Print() const;
193 
194  size_t GetBatchSize() const {return fBatchSize;}
195  size_t GetInputWidth() const {return fInputWidth;}
196  size_t GetWidth() const {return fWidth;}
198 
199  void SetDropoutProbability(Scalar_t p) {fDropoutProbability = p;}
200 
202 
203  Matrix_t & GetOutput() {return fOutput;}
204  const Matrix_t & GetOutput() const {return fOutput;}
205  Matrix_t & GetWeights() const {return fWeights;}
206  Matrix_t & GetBiases() {return fBiases;}
207  const Matrix_t & GetBiases() const {return fBiases;}
211  const Matrix_t & GetBiasGradients() const {return fBiasGradients;}
213  const Matrix_t & GetWeightGradients() const {return fWeightGradients;}
214 
215 };
216 
217 //______________________________________________________________________________
218 //
219 // The Layer Class - Implementation
220 //______________________________________________________________________________
221 
222 template<typename Architecture_t>
224  size_t inputWidth,
225  size_t width,
227  Scalar_t dropoutProbability)
228  : fBatchSize(batchSize), fInputWidth(inputWidth), fWidth(width),
229  fDropoutProbability(dropoutProbability), fWeights(width, fInputWidth),
230  fBiases(width, 1), fOutput(fBatchSize, width), fDerivatives(fBatchSize, width),
231  fWeightGradients(width, fInputWidth), fBiasGradients(width, 1),
233 {
234  // Nothing to do here.
235 }
236 
237 //______________________________________________________________________________
238 template<typename Architecture_t>
240  : fBatchSize(layer.fBatchSize), fInputWidth(layer.fInputWidth),
242  fWeights(layer.fWidth, layer.fInputWidth), fBiases(layer.fWidth, 1),
243  fOutput(layer.fBatchSize, layer.fWidth),
244  fDerivatives(layer.fBatchSize, layer.fWidth),
245  fWeightGradients(layer.fWidth, layer.fInputWidth),
246  fBiasGradients(layer.fWidth, 1),
247  fActivationGradients(layer.fBatchSize, layer.fWidth),
248  fF(layer.fF)
249 {
252 }
253 
254 //______________________________________________________________________________
255 template<typename Architecture_t>
257 -> void
258 {
259  initialize<Architecture_t>(fWeights, m);
260  initialize<Architecture_t>(fBiases, EInitialization::kZero);
261 }
262 
263 //______________________________________________________________________________
264 template<typename Architecture_t>
266  bool applyDropout)
267 -> void
268 {
269  if (applyDropout && (fDropoutProbability != 1.0)) {
270  Architecture_t::Dropout(input, fDropoutProbability);
271  }
272  Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
273  Architecture_t::AddRowWise(fOutput, fBiases);
274  evaluateDerivative<Architecture_t>(fDerivatives, fF, fOutput);
275  evaluate<Architecture_t>(fOutput, fF);
276 }
277 
278 //______________________________________________________________________________
279 template<typename Architecture_t>
280 auto TLayer<Architecture_t>::Backward(Matrix_t & gradients_backward,
281  const Matrix_t & activations_backward,
284 -> void
285 {
286  Architecture_t::Backward(gradients_backward,
289  fDerivatives,
291  fWeights,
292  activations_backward);
293  addRegularizationGradients<Architecture_t>(fWeightGradients,
294  fWeights,
295  weightDecay, r);
296 }
297 
298 //______________________________________________________________________________
299 template<typename Architecture_t>
301 {
302  std::cout << "Width = " << fWeights.GetNrows();
303  std::cout << ", Activation Function = ";
304  std::cout << static_cast<int>(fF) << std::endl;
305 }
306 
307 //______________________________________________________________________________
308 //
309 // The Shared Layer Class - Implementation
310 //______________________________________________________________________________
311 
312 //______________________________________________________________________________
313 template<typename Architecture_t>
315  TLayer<Architecture_t> &layer)
316 : fBatchSize(BatchSize),
317 fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
319 fWeights(layer.GetWeights()), fBiases(layer.GetBiases()),
323 {
324  // Nothing to do here.
325 }
326 
327 //______________________________________________________________________________
328 template<typename Architecture_t>
330  : fBatchSize(layer.fBatchSize),
331  fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
333  fBiases(layer.fBiases), fOutput(layer.fBatchSize, fWidth),
336  fF(layer.fF)
337 {
338 }
339 
340 //______________________________________________________________________________
341 template<typename Architecture_t>
343  bool applyDropout)
344 -> void
345 {
346  if (applyDropout && (fDropoutProbability != 1.0)) {
347  Architecture_t::Dropout(input, fDropoutProbability);
348  }
349  Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
350  Architecture_t::AddRowWise(fOutput, fBiases);
351  evaluateDerivative<Architecture_t>(fDerivatives, fF, fOutput);
352  evaluate<Architecture_t>(fOutput, fF);
353 }
354 
355 //______________________________________________________________________________
356 template<typename Architecture_t>
357 auto inline TSharedLayer<Architecture_t>::Backward(Matrix_t & gradients_backward,
358  const Matrix_t & activations_backward,
361 -> void
362 {
363  Architecture_t::Backward(gradients_backward,
366  fDerivatives,
368  fWeights,
369  activations_backward);
370  addRegularizationGradients<Architecture_t>(fWeightGradients,
371  fWeights,
372  weightDecay, r);
373 }
374 
375 //______________________________________________________________________________
376 template<typename Architecture_t>
378 {
379  std::cout << "Width = " << fWeights.GetNrows();
380  std::cout << ", Activation Function = ";
381  std::cout << static_cast<int>(fF) << std::endl;
382 }
383 
384 } // namespace DNN
385 } // namespace TMVA
386 
387 #endif
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition: Layer.h:265
EActivationFunction fF
Activation function of the layer.
Definition: Layer.h:169
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
Definition: Layer.h:71
void Print() const
Definition: Layer.h:377
void SetDropoutProbability(Scalar_t p)
Definition: Layer.h:199
Matrix_t & GetWeights()
Definition: Layer.h:117
Matrix_t & GetOutput()
Definition: Layer.h:203
TLayer(size_t BatchSize, size_t InputWidth, size_t Width, EActivationFunction f, Scalar_t dropoutProbability)
Definition: Layer.h:223
auto * m
Definition: textangle.C:8
EActivationFunction GetActivationFunction() const
Definition: Layer.h:201
image html pict1_TGaxis_012 png width
Define new text attributes for the label number "labNum".
Definition: TGaxis.cxx:2551
size_t fInputWidth
Number of neurons of the previous layer.
Definition: Layer.h:62
size_t fBatchSize
Batch size used for training and evaluation.
Definition: Layer.h:61
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
Definition: Layer.h:280
#define f(i)
Definition: RSha256.hxx:104
TSharedLayer(size_t fBatchSize, TLayer< Architecture_t > &layer)
Definition: Layer.h:314
size_t GetDropoutProbability() const
Definition: Layer.h:109
Matrix_t & GetWeights() const
Definition: Layer.h:205
size_t GetDropoutProbability() const
Definition: Layer.h:197
Matrix_t fOutput
Activations of this layer.
Definition: Layer.h:163
const Matrix_t & GetOutput() const
Definition: Layer.h:204
const Matrix_t & GetBiasGradients() const
Definition: Layer.h:211
Matrix_t & GetWeightGradients()
Definition: Layer.h:125
EActivationFunction fF
Activation function of the layer.
Definition: Layer.h:75
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
Definition: Layer.h:164
const Matrix_t & GetBiases() const
Definition: Layer.h:207
const Matrix_t & GetBiases() const
Definition: Layer.h:120
EInitialization
Definition: Functions.h:70
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
Definition: Layer.h:72
Matrix_t & fBiases
Reference to the bias vectors of this layer.
Definition: Layer.h:162
Matrix_t fWeightGradients
Gradients w.r.t. the weigths of this layer.
Definition: Layer.h:165
Generic layer class.
Definition: Layer.h:52
Matrix_t fWeights
The fWeights of this layer.
Definition: Layer.h:67
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
size_t fInputWidth
Number of neurons of the previous layer.
Definition: Layer.h:156
Matrix_t & GetBiasGradients()
Definition: Layer.h:123
Matrix_t & GetWeightGradients()
Definition: Layer.h:212
const Matrix_t & GetActivationGradients() const
Definition: Layer.h:122
typename Architecture_t::Scalar_t Scalar_t
Definition: Layer.h:150
typename Architecture_t::Scalar_t Scalar_t
Definition: Layer.h:56
Matrix_t fBiases
The bias values of this layer.
Definition: Layer.h:68
size_t GetWidth() const
Definition: Layer.h:108
Matrix_t fBiasGradients
Gradients w.r.t. the bias values of this layer.
Definition: Layer.h:166
size_t GetInputWidth() const
Definition: Layer.h:107
Matrix_t & GetOutput()
Definition: Layer.h:115
Matrix_t & GetActivationGradients()
Definition: Layer.h:208
EActivationFunction GetActivationFunction() const
Definition: Layer.h:113
ROOT::R::TRInterface & r
Definition: Object.C:4
Matrix_t & GetActivationGradients()
Definition: Layer.h:121
typename Architecture_t::Matrix_t Matrix_t
Definition: Layer.h:151
size_t GetInputWidth() const
Definition: Layer.h:195
size_t GetBatchSize() const
Definition: Layer.h:106
Matrix_t fOutput
Activations of this layer.
Definition: Layer.h:69
Scalar_t fDropoutProbability
Probability that an input is active.
Definition: Layer.h:159
Matrix_t & fWeights
Reference to the weight matrix of this layer.
Definition: Layer.h:161
Matrix_t & GetBiasGradients()
Definition: Layer.h:210
const Matrix_t & GetOutput() const
Definition: Layer.h:116
const Matrix_t & GetWeightGradients() const
Definition: Layer.h:213
size_t fBatchSize
Batch size used for training and evaluation.
Definition: Layer.h:155
void Forward(Matrix_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition: Layer.h:342
void Initialize(EInitialization m)
Initialize fWeights according to the given initialization method.
Definition: Layer.h:256
void SetDropoutProbability(Scalar_t p)
Definition: Layer.h:111
void Copy(void *source, void *dest)
Matrix_t & GetBiases()
Definition: Layer.h:119
Layer class width shared weight and bias layers.
Definition: Layer.h:145
Matrix_t fDerivatives
First fDerivatives of the activations of this layer.
Definition: Layer.h:70
const Matrix_t & GetActivationGradients() const
Definition: Layer.h:209
size_t fWidth
Number of neurons of this layer.
Definition: Layer.h:63
Matrix_t & GetBiases()
Definition: Layer.h:206
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Definition: Layer.h:73
Scalar_t fDropoutProbability
Probability that an input is active.
Definition: Layer.h:65
typename Architecture_t::Matrix_t Matrix_t
Definition: Layer.h:57
Abstract ClassifierFactory template that handles arbitrary types.
Matrix_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Definition: Layer.h:167
size_t GetBatchSize() const
Definition: Layer.h:194
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:62
const Matrix_t & GetWeights() const
Definition: Layer.h:118
const Matrix_t & GetBiasGradients() const
Definition: Layer.h:124
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
void Backward(Matrix_t &gradients_backward, const Matrix_t &activations_backward, ERegularization r, Scalar_t weightDecay)
Compute weight, bias and activation gradients.
Definition: Layer.h:357
size_t fWidth
Number of neurons of this layer.
Definition: Layer.h:157
const Matrix_t & GetWeightGradients() const
Definition: Layer.h:126
size_t GetWidth() const
Definition: Layer.h:196
void Print() const
Definition: Layer.h:300