Logo ROOT   6.18/05
Reference Guide
DenseLayer.h
Go to the documentation of this file.
1
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TDenseLayer *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Dense Layer Class *
12 * *
13 * Authors (alphabetical): *
14 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15 * *
16 * Copyright (c) 2005-2015: *
17 * CERN, Switzerland *
18 * U. of Victoria, Canada *
19 * MPI-K Heidelberg, Germany *
20 * U. of Bonn, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27#ifndef TMVA_DNN_DENSELAYER
28#define TMVA_DNN_DENSELAYER
29
30#include "TMatrix.h"
31
33#include "TMVA/DNN/Functions.h"
34
35#include <iostream>
36#include <iomanip>
37
38namespace TMVA {
39namespace DNN {
40/** \class TDenseLayer
41
42Generic layer class.
43
44This generic layer class represents a dense layer of a neural network with
45a given width n and activation function f. The activation function of each
46layer is given by \f$\mathbf{u} = \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$.
47
48In addition to the weight and bias matrices, each layer allocates memory
49for its activations and the corresponding first partial fDerivatives of
50the activation function as well as the gradients of the weights and biases.
51
52The layer provides member functions for the forward propagation of
53activations through the given layer.
54*/
55template <typename Architecture_t>
56class TDenseLayer : public VGeneralLayer<Architecture_t> {
57public:
58 using Scalar_t = typename Architecture_t::Scalar_t;
59 using Matrix_t = typename Architecture_t::Matrix_t;
60
61private:
62 std::vector<Matrix_t> fDerivatives; ///< First fDerivatives of the activations of this layer.
63
64 Scalar_t fDropoutProbability; ///< Probability that an input is active.
65
66 EActivationFunction fF; ///< Activation function of the layer.
67 ERegularization fReg; ///< The regularization method.
68 Scalar_t fWeightDecay; ///< The weight decay.
69
70public:
71 /*! Constructor */
72 TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability,
74
75 /*! Copy the dense layer provided as a pointer */
77
78 /*! Copy Constructor */
79 TDenseLayer(const TDenseLayer &);
80
81 /*! Destructor */
83
84 /*! Compute activation of the layer for the given input. The input
85 * must be in 3D tensor form with the different matrices corresponding to
86 * different events in the batch. Computes activations as well as
87 * the first partial derivative of the activation function at those
88 * activations. */
89 void Forward(std::vector<Matrix_t> &input, bool applyDropout = false);
90
91 /*! Compute weight, bias and activation gradients. Uses the precomputed
92 * first partial derviatives of the activation function computed during
93 * forward propagation and modifies them. Must only be called directly
94 * a the corresponding call to Forward(...). */
95 void Backward(std::vector<Matrix_t> &gradients_backward, const std::vector<Matrix_t> &activations_backward,
96 std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);
97
98 /*! Printing the layer info. */
99 void Print() const;
100
101 /*! Writes the information and the weights about the layer in an XML node. */
102 virtual void AddWeightsXMLTo(void *parent);
103
104 /*! Read the information and the weights about the layer from XML node. */
105 virtual void ReadWeightsFromXML(void *parent);
106
107 /*! Set dropout probabilities */
108 virtual void SetDropoutProbability(Scalar_t dropoutProbability) { fDropoutProbability = dropoutProbability; }
109
110 /*! Getters */
112
113 const std::vector<Matrix_t> &GetDerivatives() const { return fDerivatives; }
114 std::vector<Matrix_t> &GetDerivatives() { return fDerivatives; }
115
116 Matrix_t &GetDerivativesAt(size_t i) { return fDerivatives[i]; }
117 const Matrix_t &GetDerivativesAt(size_t i) const { return fDerivatives[i]; }
118
122};
123
124//
125//
126// The Dense Layer Class - Implementation
127//______________________________________________________________________________
128template <typename Architecture_t>
129TDenseLayer<Architecture_t>::TDenseLayer(size_t batchSize, size_t inputWidth, size_t width, EInitialization init,
130 Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg,
132 : VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1, width, 1, width, inputWidth, 1, width, 1, 1,
133 batchSize, width, init),
134 fDerivatives(), fDropoutProbability(dropoutProbability), fF(f), fReg(reg), fWeightDecay(weightDecay)
135{
136 fDerivatives.emplace_back(batchSize, width);
137}
138
139//______________________________________________________________________________
140template <typename Architecture_t>
142 : VGeneralLayer<Architecture_t>(layer), fDerivatives(), fDropoutProbability(layer->GetDropoutProbability()),
143 fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay())
144{
145 fDerivatives.emplace_back(layer->GetBatchSize(), layer->GetWidth());
146}
147
148//______________________________________________________________________________
149template <typename Architecture_t>
151 : VGeneralLayer<Architecture_t>(layer), fDerivatives(), fDropoutProbability(layer.fDropoutProbability), fF(layer.fF),
152 fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)
153{
154 fDerivatives.emplace_back(layer.fBatchSize, layer.fWidth);
155}
156
157//______________________________________________________________________________
158template <typename Architecture_t>
160{
161 // Nothing to do here.
162}
163
164//______________________________________________________________________________
165template <typename Architecture_t>
166auto TDenseLayer<Architecture_t>::Forward(std::vector<Matrix_t> &input, bool applyDropout) -> void
167{
168 if (applyDropout && (this->GetDropoutProbability() != 1.0)) {
169 Architecture_t::Dropout(input[0], this->GetDropoutProbability());
170 }
171 Architecture_t::MultiplyTranspose(this->GetOutputAt(0), input[0], this->GetWeightsAt(0));
172 Architecture_t::AddRowWise(this->GetOutputAt(0), this->GetBiasesAt(0));
173 evaluateDerivative<Architecture_t>(this->GetDerivativesAt(0), this->GetActivationFunction(), this->GetOutputAt(0));
174 evaluate<Architecture_t>(this->GetOutputAt(0), this->GetActivationFunction());
175}
176
177//______________________________________________________________________________
178template <typename Architecture_t>
179auto TDenseLayer<Architecture_t>::Backward(std::vector<Matrix_t> &gradients_backward,
180 const std::vector<Matrix_t> &activations_backward,
181 std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
182 /*inp2*/) -> void
183{
184 if (gradients_backward.size() == 0) {
185 Matrix_t dummy(0, 0);
186 Architecture_t::Backward(dummy, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
187 this->GetDerivativesAt(0), this->GetActivationGradientsAt(0), this->GetWeightsAt(0),
188 activations_backward[0]);
189
190 } else {
191 Architecture_t::Backward(gradients_backward[0], this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
192 this->GetDerivativesAt(0), this->GetActivationGradientsAt(0), this->GetWeightsAt(0),
193 activations_backward[0]);
194 }
195
196 addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),
197 this->GetWeightDecay(), this->GetRegularization());
198}
199
200//______________________________________________________________________________
201template <typename Architecture_t>
203{
204 std::cout << " DENSE Layer: \t";
205 std::cout << " ( Input =" << std::setw(6) << this->GetWeightsAt(0).GetNcols(); // input size
206 std::cout << " , Width =" << std::setw(6) << this->GetWeightsAt(0).GetNrows() << " ) "; // layer width
207 if (this->GetOutput().size() > 0) {
208 std::cout << "\tOutput = ( " << std::setw(2) << this->GetOutput().size() << " ," << std::setw(6) << this->GetOutput()[0].GetNrows() << " ," << std::setw(6) << this->GetOutput()[0].GetNcols() << " ) ";
209 }
210 std::vector<std::string> activationNames = { "Identity","Relu","Sigmoid","Tanh","SymmRelu","SoftSign","Gauss" };
211 std::cout << "\t Activation Function = ";
212 std::cout << activationNames[ static_cast<int>(fF) ];
213 if (fDropoutProbability != 1.) std::cout << "\t Dropout prob. = " << fDropoutProbability;
214 std::cout << std::endl;
215}
216
217//______________________________________________________________________________
218
219template <typename Architecture_t>
221{
222 // write layer width activation function + weigbht and bias matrices
223
224 auto layerxml = gTools().xmlengine().NewChild(parent, 0, "DenseLayer");
225
226 gTools().xmlengine().NewAttr(layerxml, 0, "Width", gTools().StringFromInt(this->GetWidth()));
227
228 int activationFunction = static_cast<int>(this -> GetActivationFunction());
229 gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",
230 TString::Itoa(activationFunction, 10));
231 // write weights and bias matrix
232 this->WriteMatrixToXML(layerxml, "Weights", this -> GetWeightsAt(0));
233 this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0));
234}
235
236//______________________________________________________________________________
237template <typename Architecture_t>
239{
240 // Read layer weights and biases from XML
241 this->ReadMatrixXML(parent,"Weights", this -> GetWeightsAt(0));
242 this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
243
244}
245
246} // namespace DNN
247} // namespace TMVA
248
249#endif
#define f(i)
Definition: RSha256.hxx:104
static Int_t init()
static RooMathCoreReg dummy
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
Generic layer class.
Definition: DenseLayer.h:56
ERegularization fReg
The regularization method.
Definition: DenseLayer.h:67
ERegularization GetRegularization() const
Definition: DenseLayer.h:120
std::vector< Matrix_t > fDerivatives
First fDerivatives of the activations of this layer.
Definition: DenseLayer.h:62
void Backward(std::vector< Matrix_t > &gradients_backward, const std::vector< Matrix_t > &activations_backward, std::vector< Matrix_t > &inp1, std::vector< Matrix_t > &inp2)
Compute weight, bias and activation gradients.
Definition: DenseLayer.h:179
std::vector< Matrix_t > & GetDerivatives()
Definition: DenseLayer.h:114
Matrix_t & GetDerivativesAt(size_t i)
Definition: DenseLayer.h:116
typename Architecture_t::Matrix_t Matrix_t
Definition: DenseLayer.h:59
Scalar_t fWeightDecay
The weight decay.
Definition: DenseLayer.h:68
void Forward(std::vector< Matrix_t > &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition: DenseLayer.h:166
EActivationFunction fF
Activation function of the layer.
Definition: DenseLayer.h:66
TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability, EActivationFunction f, ERegularization reg, Scalar_t weightDecay)
Constructor.
Definition: DenseLayer.h:129
void Print() const
Printing the layer info.
Definition: DenseLayer.h:202
~TDenseLayer()
Destructor.
Definition: DenseLayer.h:159
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Definition: DenseLayer.h:220
Scalar_t GetDropoutProbability() const
Getters.
Definition: DenseLayer.h:111
EActivationFunction GetActivationFunction() const
Definition: DenseLayer.h:119
virtual void SetDropoutProbability(Scalar_t dropoutProbability)
Set dropout probabilities.
Definition: DenseLayer.h:108
typename Architecture_t::Scalar_t Scalar_t
Definition: DenseLayer.h:58
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Definition: DenseLayer.h:238
Scalar_t fDropoutProbability
Probability that an input is active.
Definition: DenseLayer.h:64
Scalar_t GetWeightDecay() const
Definition: DenseLayer.h:121
const std::vector< Matrix_t > & GetDerivatives() const
Definition: DenseLayer.h:113
const Matrix_t & GetDerivativesAt(size_t i) const
Definition: DenseLayer.h:117
Generic General Layer class.
Definition: GeneralLayer.h:46
size_t fBatchSize
Batch size used for training and evaluation.
Definition: GeneralLayer.h:51
size_t fWidth
The width of this layer.
Definition: GeneralLayer.h:59
size_t GetBatchSize() const
Getters.
Definition: GeneralLayer.h:144
size_t GetWidth() const
Definition: GeneralLayer.h:150
TXMLEngine & xmlengine()
Definition: Tools.h:270
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition: TString.cxx:2025
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:580
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:709
EInitialization
Definition: Functions.h:70
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
create variable transformations
Tools & gTools()