Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
Net.h
Go to the documentation of this file.
1// @(#)root/tmva: $Id$
2// Author: Simon Pfreundschuh 20/06/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12#ifndef TMVA_DNN_NET
13#define TMVA_DNN_NET
14
15#include <vector>
16#include <iostream>
17
18#include "Layer.h"
19
20namespace TMVA {
21namespace DNN {
22
23/** \class TNet
24
25 Generic neural network class.
26
27 This generic neural network class represents a concrete neural
28 network through a vector of layers and coordinates the forward
29 and backward propagation through the net.
30
31 The net takes as input a batch from the training data given in
32 matrix form, with each row corresponding to a certain training
33 event.
34
35 On construction, the neural network allocates all the memory
36 required for the training of the neural net and keeps it until
37 its destruction.
38
39 The Architecture type argument simply holds the
40 architecture-specific data types, which are just the matrix type
41 Matrix_t and the used scalar type Scalar_t.
42
43 \tparam Architecture The Architecture type that holds the
44 \tparam Layer_t The type used for the layers. Can be either
45 Layer<Architecture> or SharedWeightLayer<Architecture>.
46 datatypes for a given architecture.
47*/
48template<typename Architecture_t, typename Layer_t = TLayer<Architecture_t>>
49 class TNet {
50
51public:
52 using Matrix_t = typename Architecture_t::Matrix_t;
53 using Scalar_t = typename Architecture_t::Scalar_t;
54 using LayerIterator_t = typename std::vector<Layer_t>::iterator;
55
56private:
57 size_t fBatchSize; ///< Batch size for training and evaluation of the Network.
58 size_t fInputWidth; ///< Number of features in a single input event.
59
60 std::vector<Layer_t> fLayers; ///< Layers in the network.
61
62 Matrix_t fDummy; ///< Empty matrix for last step in back propagation.
63 ELossFunction fJ; ///< The loss function of the network.
64 ERegularization fR; ///< The regularization used for the network.
65 Scalar_t fWeightDecay; ///< The weight decay factor.
66
67public:
68 TNet();
69 TNet(const TNet & other);
70 template<typename OtherArchitecture_t>
71 TNet(size_t batchSize, const TNet<OtherArchitecture_t> &);
72 /*! Construct a neural net for a given batch size with
73 * given output function * and regularization. */
74 TNet(size_t batchSize,
75 size_t inputWidth,
79 /*! Create a clone that uses the same weight and biases matrices but
80 * potentially a difference batch size. */
82
83 /*! Add a layer of the given size to the neural net. */
85 Scalar_t dropoutProbability = 1.0);
86
87 /*! Remove all layers from the network.*/
88 void Clear();
89
90 /*! Add a layer which shares its weights with another TNet instance. */
91 template <typename SharedLayer>
92 void AddLayer(SharedLayer & layer);
93
94 /*! Iterator to the first layer of the net. */
96
97 /*! Iterator to the last layer of the net. */
99
100 /*! Initialize the weights in the net with the
101 * initialization method. */
102 inline void Initialize(EInitialization m);
103
104 /*! Initialize the gradients in the net to zero. Required if net is
105 * used to store velocities of momentum-based minimization techniques. */
106 inline void InitializeGradients();
107
108 /*! Forward a given input through the neural net. Computes
109 * all layer activations up to the output layer */
110 inline void Forward(Matrix_t& X, bool applyDropout = false);
111
112 /*! Compute the weight gradients in the net from the given training
113 * samples X and training labels Y. */
114 inline void Backward(const Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights);
115
116 /*! Evaluate the loss function of the net using the activations
117 * that are currently stored in the output layer. */
118 inline Scalar_t Loss(const Matrix_t &Y, const Matrix_t &weights, bool includeRegularization = true) const;
119
120 /*! Propagate the input batch X through the net and evaluate the
121 * error function for the resulting activations of the output
122 * layer */
123 inline Scalar_t Loss(Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights, bool applyDropout = false,
124 bool includeRegularization = true);
125
126 /*! Compute the neural network prediction obtained from forwarding the
127 * batch X through the neural network and applying the output function
128 * f to the activation of the last layer in the network. */
129 inline void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f);
130
131 /*! Compute the neural network prediction obtained from applying the output
132 * function f to the activation of the last layer in the network. */
133 inline void Prediction(Matrix_t &Y_hat, EOutputFunction f) const;
134
136
137 size_t GetDepth() const {return fLayers.size();}
138 size_t GetBatchSize() const {return fBatchSize;}
139 Layer_t & GetLayer(size_t i) {return fLayers[i];}
140 const Layer_t & GetLayer(size_t i) const {return fLayers[i];}
142 Matrix_t & GetOutput() {return fLayers.back().GetOutput();}
143 size_t GetInputWidth() const {return fInputWidth;}
144 size_t GetOutputWidth() const {return fLayers.back().GetWidth();}
147
148 void SetBatchSize(size_t batchSize) {fBatchSize = batchSize;}
149 void SetInputWidth(size_t inputWidth) {fInputWidth = inputWidth;}
153 void SetDropoutProbabilities(const std::vector<Double_t> & probabilities);
154
155 void Print();
156};
157
158//______________________________________________________________________________
159template<typename Architecture_t, typename Layer_t>
161 : fBatchSize(0), fInputWidth(0), fLayers(), fDummy(0,0),
163 fWeightDecay(0.0)
164{
165 // Nothing to do here.
166}
167
168//______________________________________________________________________________
169template<typename Architecture_t, typename Layer_t>
171 : fBatchSize(other.fBatchSize), fInputWidth(other.fInputWidth),
172 fLayers(other.fLayers), fDummy(0,0), fJ(other.fJ), fR(other.fR),
173 fWeightDecay(other.fWeightDecay)
174{
175 // Nothing to do here.
176}
177
178//______________________________________________________________________________
179template<typename Architecture_t, typename Layer_t>
180template<typename OtherArchitecture_t>
182 const TNet<OtherArchitecture_t> & other)
183 : fBatchSize(batchSize), fInputWidth(other.GetInputWidth()), fLayers(),
184 fDummy(0,0), fJ(other.GetLossFunction()), fR(other.GetRegularization()),
185 fWeightDecay(other.GetWeightDecay())
186{
187 fLayers.reserve(other.GetDepth());
188 for (size_t i = 0; i < other.GetDepth(); i++) {
189 AddLayer(other.GetLayer(i).GetWidth(),
190 other.GetLayer(i).GetActivationFunction(),
191 other.GetLayer(i).GetDropoutProbability());
192 fLayers[i].GetWeights() = (TMatrixT<Scalar_t>) other.GetLayer(i).GetWeights();
193 fLayers[i].GetBiases() = (TMatrixT<Scalar_t>) other.GetLayer(i).GetBiases();
194 }
195}
196
197//______________________________________________________________________________
198template<typename Architecture_t, typename Layer_t>
200 size_t inputWidth,
204 : fBatchSize(batchSize), fInputWidth(inputWidth), fLayers(), fDummy(0,0),
205 fJ(J), fR(R), fWeightDecay(weightDecay)
206{
207 // Nothing to do here.
208}
209
210//______________________________________________________________________________
211template<typename Architecture_t, typename Layer_t>
214{
215 TNet<Architecture_t, TSharedLayer<Architecture_t>> other(BatchSize, fInputWidth,
216 fJ, fR);
217 for (auto &l : fLayers) {
218 other.AddLayer(l);
219 }
220 return other;
221}
222
223//______________________________________________________________________________
224template<typename Architecture_t, typename Layer_t>
227 Scalar_t dropoutProbability)
228{
229 if (fLayers.size() == 0) {
230 fLayers.emplace_back(fBatchSize, fInputWidth, width, f, dropoutProbability);
231 } else {
232 size_t prevWidth = fLayers.back().GetWidth();
233 fLayers.emplace_back(fBatchSize, prevWidth, width, f, dropoutProbability);
234 }
235}
236
237//______________________________________________________________________________
238template<typename Architecture_t, typename Layer_t>
240{
241 fLayers.clear();
242}
243
244//______________________________________________________________________________
245template<typename Architecture_t, typename Layer_t>
246 template<typename SharedLayer_t>
247 inline void TNet<Architecture_t, Layer_t>::AddLayer(SharedLayer_t & layer)
248{
249 fLayers.emplace_back(fBatchSize, layer);
250}
251
252//______________________________________________________________________________
253template<typename Architecture_t, typename Layer_t>
255{
256 for (auto &l : fLayers) {
257 l.Initialize(m);
258 }
259}
260
261//______________________________________________________________________________
262template<typename Architecture_t, typename Layer_t>
264{
265 for (auto &l : fLayers) {
266 initialize<Architecture_t>(l.GetWeightGradients(), EInitialization::kZero);
267 initialize<Architecture_t>(l.GetBiasGradients(), EInitialization::kZero);
268 }
269}
270
271//______________________________________________________________________________
272template<typename Architecture_t, typename Layer_t>
274 bool applyDropout)
275{
276 fLayers.front().Forward(input, applyDropout);
277
278 for (size_t i = 1; i < fLayers.size(); i++) {
279 fLayers[i].Forward(fLayers[i-1].GetOutput(), applyDropout);
280 }
281}
282
283//______________________________________________________________________________
284template <typename Architecture_t, typename Layer_t>
285inline void TNet<Architecture_t, Layer_t>::Backward(const Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights)
286{
287
288 evaluateGradients<Architecture_t>(fLayers.back().GetActivationGradients(), fJ, Y, fLayers.back().GetOutput(),
289 weights);
290
291 for (size_t i = fLayers.size()-1; i > 0; i--) {
292 auto & activation_gradient_backward
293 = fLayers[i-1].GetActivationGradients();
294 auto & activations_backward
295 = fLayers[i-1].GetOutput();
296 fLayers[i].Backward(activation_gradient_backward,
297 activations_backward, fR, fWeightDecay);
298 }
299 fLayers[0].Backward(fDummy, X, fR, fWeightDecay);
300
301}
302
303//______________________________________________________________________________
304template <typename Architecture_t, typename Layer_t>
305inline auto TNet<Architecture_t, Layer_t>::Loss(const Matrix_t &Y, const Matrix_t &weights,
306 bool includeRegularization) const -> Scalar_t
307{
308 auto loss = evaluate<Architecture_t>(fJ, Y, fLayers.back().GetOutput(), weights);
309 includeRegularization &= (fR != ERegularization::kNone);
310 if (includeRegularization) {
311 for (auto &l : fLayers) {
312 loss += fWeightDecay * regularization<Architecture_t>(l.GetWeights(), fR);
313 }
314 }
315 return loss;
316}
317
318//______________________________________________________________________________
319template <typename Architecture_t, typename Layer_t>
320inline auto TNet<Architecture_t, Layer_t>::Loss(Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights,
321 bool applyDropout, bool includeRegularization) -> Scalar_t
322{
323 Forward(X, applyDropout);
324 return Loss(Y, weights, includeRegularization);
325}
326
327//______________________________________________________________________________
328template<typename Architecture_t, typename Layer_t>
330 Matrix_t &X,
332{
333 Forward(X, false);
334 evaluate<Architecture_t>(Yhat, f, fLayers.back().GetOutput());
335}
336
337//______________________________________________________________________________
338template<typename Architecture_t, typename Layer_t>
340 EOutputFunction f) const
341{
342 evaluate<Architecture_t>(Y_hat, f, fLayers.back().GetOutput());
343}
344
345//______________________________________________________________________________
346template<typename Architecture_t, typename Layer_t>
348 -> Scalar_t
349{
350 Scalar_t flops = 0;
351
352 Scalar_t nb = (Scalar_t) fBatchSize;
353 Scalar_t nlp = (Scalar_t) fInputWidth;
354
355 for(size_t i = 0; i < fLayers.size(); i++) {
356 Layer_t & layer = fLayers[i];
357 Scalar_t nl = (Scalar_t) layer.GetWidth();
358
359 // Forward propagation.
360 flops += nb * nl * (2.0 * nlp - 1); // Matrix mult.
361 flops += nb * nl; // Add bias values.
362 flops += 2 * nb * nl; // Apply activation function and compute
363 // derivative.
364 // Backward propagation.
365 flops += nb * nl; // Hadamard
366 flops += nlp * nl * (2.0 * nb - 1.0); // Weight gradients
367 flops += nl * (nb - 1); // Bias gradients
368 if (i > 0) {
369 flops += nlp * nb * (2.0 * nl - 1.0); // Previous layer gradients.
370 }
371 nlp = nl;
372 }
373 return flops;
374}
375
376//______________________________________________________________________________
377template<typename Architecture_t, typename Layer_t>
379 const std::vector<Double_t> & probabilities)
380{
381 for (size_t i = 0; i < fLayers.size(); i++) {
382 if (i < probabilities.size()) {
383 fLayers[i].SetDropoutProbability(probabilities[i]);
384 } else {
385 fLayers[i].SetDropoutProbability(1.0);
386 }
387 }
388}
389
390//______________________________________________________________________________
391template<typename Architecture_t, typename Layer_t>
393{
394 std::cout << "DEEP NEURAL NETWORK:";
395 std::cout << " Loss function = " << static_cast<char>(fJ);
396 std::cout << ", Depth = " << fLayers.size() << std::endl;
397
398 size_t i = 1;
399 for (auto & l : fLayers) {
400 std::cout << "DNN Layer " << i << ":" << std::endl;
401 l.Print();
402 i++;
403 }
404
405}
406
407} // namespace DNN
408} // namespace TMVA
409
410#endif
#define f(i)
Definition RSha256.hxx:104
#define R(a, b, c, d, e, f, g, h, i)
Definition RSha256.hxx:110
#define X(type, name)
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t width
void Print(Option_t *option="") const override
Dump this line with its attributes.
Definition TLine.cxx:419
Generic neural network class.
Definition Net.h:49
void SetWeightDecay(Scalar_t weightDecay)
Definition Net.h:152
void Print()
Definition Net.h:392
void Initialize(EInitialization m)
Initialize the weights in the net with the initialization method.
Definition Net.h:254
Scalar_t Loss(const Matrix_t &Y, const Matrix_t &weights, bool includeRegularization=true) const
Evaluate the loss function of the net using the activations that are currently stored in the output l...
Definition Net.h:305
void SetRegularization(ERegularization R)
Definition Net.h:150
size_t fInputWidth
Number of features in a single input event.
Definition Net.h:58
LayerIterator_t LayersEnd()
Iterator to the last layer of the net.
Definition Net.h:98
size_t GetOutputWidth() const
Definition Net.h:144
const Layer_t & GetLayer(size_t i) const
Definition Net.h:140
typename Architecture_t::Matrix_t Matrix_t
Definition Net.h:52
size_t fBatchSize
Batch size for training and evaluation of the Network.
Definition Net.h:57
ELossFunction fJ
The loss function of the network.
Definition Net.h:63
void Forward(Matrix_t &X, bool applyDropout=false)
Forward a given input through the neural net.
Definition Net.h:273
ERegularization GetRegularization() const
Definition Net.h:145
void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f)
Compute the neural network prediction obtained from forwarding the batch X through the neural network...
Definition Net.h:329
void SetBatchSize(size_t batchSize)
Definition Net.h:148
void InitializeGradients()
Initialize the gradients in the net to zero.
Definition Net.h:263
Scalar_t fWeightDecay
The weight decay factor.
Definition Net.h:65
ERegularization fR
The regularization used for the network.
Definition Net.h:64
LayerIterator_t LayersBegin()
Iterator to the first layer of the net.
Definition Net.h:95
TNet< Architecture_t, TSharedLayer< Architecture_t > > CreateClone(size_t batchSize)
Create a clone that uses the same weight and biases matrices but potentially a difference batch size.
Definition Net.h:212
Matrix_t & GetOutput()
Definition Net.h:142
void Backward(const Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights)
Compute the weight gradients in the net from the given training samples X and training labels Y.
Definition Net.h:285
ELossFunction GetLossFunction() const
Definition Net.h:141
Matrix_t fDummy
Empty matrix for last step in back propagation.
Definition Net.h:62
Scalar_t GetNFlops()
Definition Net.h:347
size_t GetBatchSize() const
Definition Net.h:138
size_t GetDepth() const
Definition Net.h:137
void SetDropoutProbabilities(const std::vector< Double_t > &probabilities)
Definition Net.h:378
void SetLossFunction(ELossFunction J)
Definition Net.h:151
void Clear()
Remove all layers from the network.
Definition Net.h:239
void AddLayer(SharedLayer &layer)
Add a layer which shares its weights with another TNet instance.
void AddLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability=1.0)
Add a layer of the given size to the neural net.
Definition Net.h:225
void SetInputWidth(size_t inputWidth)
Definition Net.h:149
size_t GetInputWidth() const
Definition Net.h:143
typename std::vector< Layer_t >::iterator LayerIterator_t
Definition Net.h:54
typename Architecture_t::Scalar_t Scalar_t
Definition Net.h:53
Scalar_t GetWeightDecay() const
Definition Net.h:146
std::vector< Layer_t > fLayers
Layers in the network.
Definition Net.h:60
Layer_t & GetLayer(size_t i)
Definition Net.h:139
TMatrixT.
Definition TMatrixT.h:40
EOutputFunction
Enum that represents output functions.
Definition Functions.h:46
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
Definition Functions.h:65
EActivationFunction
Enum that represents layer activation functions.
Definition Functions.h:32
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition Functions.h:57
create variable transformations
TMarker m
Definition textangle.C:8
TLine l
Definition textangle.C:4