Logo ROOT   6.16/01
Reference Guide
DenseLayer.h
Go to the documentation of this file.
1
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TDenseLayer *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Dense Layer Class *
12 * *
13 * Authors (alphabetical): *
14 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15 * *
16 * Copyright (c) 2005-2015: *
17 * CERN, Switzerland *
18 * U. of Victoria, Canada *
19 * MPI-K Heidelberg, Germany *
20 * U. of Bonn, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27#ifndef TMVA_DNN_DENSELAYER
28#define TMVA_DNN_DENSELAYER
29
30#include "TMatrix.h"
31
33#include "TMVA/DNN/Functions.h"
34
35#include <iostream>
36
37namespace TMVA {
38namespace DNN {
39/** \class TDenseLayer
40
41Generic layer class.
42
43This generic layer class represents a dense layer of a neural network with
44a given width n and activation function f. The activation function of each
45layer is given by \f$\mathbf{u} = \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$.
46
47In addition to the weight and bias matrices, each layer allocates memory
48for its activations and the corresponding first partial fDerivatives of
49the activation function as well as the gradients of the weights and biases.
50
51The layer provides member functions for the forward propagation of
52activations through the given layer.
53*/
54template <typename Architecture_t>
55class TDenseLayer : public VGeneralLayer<Architecture_t> {
56public:
57 using Scalar_t = typename Architecture_t::Scalar_t;
58 using Matrix_t = typename Architecture_t::Matrix_t;
59
60private:
61 std::vector<Matrix_t> fDerivatives; ///< First fDerivatives of the activations of this layer.
62
63 Scalar_t fDropoutProbability; ///< Probability that an input is active.
64
65 EActivationFunction fF; ///< Activation function of the layer.
66 ERegularization fReg; ///< The regularization method.
67 Scalar_t fWeightDecay; ///< The weight decay.
68
69public:
70 /*! Constructor */
71 TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability,
73
74 /*! Copy the dense layer provided as a pointer */
76
77 /*! Copy Constructor */
78 TDenseLayer(const TDenseLayer &);
79
80 /*! Destructor */
82
83 /*! Compute activation of the layer for the given input. The input
84 * must be in 3D tensor form with the different matrices corresponding to
85 * different events in the batch. Computes activations as well as
86 * the first partial derivative of the activation function at those
87 * activations. */
88 void Forward(std::vector<Matrix_t> &input, bool applyDropout = false);
89
90 /*! Compute weight, bias and activation gradients. Uses the precomputed
91 * first partial derviatives of the activation function computed during
92 * forward propagation and modifies them. Must only be called directly
93 * a the corresponding call to Forward(...). */
94 void Backward(std::vector<Matrix_t> &gradients_backward, const std::vector<Matrix_t> &activations_backward,
95 std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);
96
97 /*! Printing the layer info. */
98 void Print() const;
99
100 /*! Writes the information and the weights about the layer in an XML node. */
101 virtual void AddWeightsXMLTo(void *parent);
102
103 /*! Read the information and the weights about the layer from XML node. */
104 virtual void ReadWeightsFromXML(void *parent);
105
106
107 /*! Getters */
109
110 const std::vector<Matrix_t> &GetDerivatives() const { return fDerivatives; }
111 std::vector<Matrix_t> &GetDerivatives() { return fDerivatives; }
112
113 Matrix_t &GetDerivativesAt(size_t i) { return fDerivatives[i]; }
114 const Matrix_t &GetDerivativesAt(size_t i) const { return fDerivatives[i]; }
115
119};
120
121//
122//
123// The Dense Layer Class - Implementation
124//______________________________________________________________________________
125template <typename Architecture_t>
126TDenseLayer<Architecture_t>::TDenseLayer(size_t batchSize, size_t inputWidth, size_t width, EInitialization init,
127 Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg,
129 : VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1, width, 1, width, inputWidth, 1, width, 1, 1,
130 batchSize, width, init),
131 fDerivatives(), fDropoutProbability(dropoutProbability), fF(f), fReg(reg), fWeightDecay(weightDecay)
132{
133 fDerivatives.emplace_back(batchSize, width);
134}
135
136//______________________________________________________________________________
137template <typename Architecture_t>
139 : VGeneralLayer<Architecture_t>(layer), fDerivatives(), fDropoutProbability(layer->GetDropoutProbability()),
140 fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay())
141{
142 fDerivatives.emplace_back(layer->GetBatchSize(), layer->GetWidth());
143}
144
145//______________________________________________________________________________
146template <typename Architecture_t>
148 : VGeneralLayer<Architecture_t>(layer), fDerivatives(), fDropoutProbability(layer.fDropoutProbability), fF(layer.fF),
149 fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)
150{
151 fDerivatives.emplace_back(layer.fBatchSize, layer.fWidth);
152}
153
154//______________________________________________________________________________
155template <typename Architecture_t>
157{
158 // Nothing to do here.
159}
160
161//______________________________________________________________________________
162template <typename Architecture_t>
163auto TDenseLayer<Architecture_t>::Forward(std::vector<Matrix_t> &input, bool applyDropout) -> void
164{
165 if (applyDropout && (this->GetDropoutProbability() != 1.0)) {
166 Architecture_t::Dropout(input[0], this->GetDropoutProbability());
167 }
168 Architecture_t::MultiplyTranspose(this->GetOutputAt(0), input[0], this->GetWeightsAt(0));
169 Architecture_t::AddRowWise(this->GetOutputAt(0), this->GetBiasesAt(0));
170 evaluateDerivative<Architecture_t>(this->GetDerivativesAt(0), this->GetActivationFunction(), this->GetOutputAt(0));
171 evaluate<Architecture_t>(this->GetOutputAt(0), this->GetActivationFunction());
172}
173
174//______________________________________________________________________________
175template <typename Architecture_t>
176auto TDenseLayer<Architecture_t>::Backward(std::vector<Matrix_t> &gradients_backward,
177 const std::vector<Matrix_t> &activations_backward,
178 std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
179 /*inp2*/) -> void
180{
181 if (gradients_backward.size() == 0) {
182 Matrix_t dummy(0, 0);
183 Architecture_t::Backward(dummy, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
184 this->GetDerivativesAt(0), this->GetActivationGradientsAt(0), this->GetWeightsAt(0),
185 activations_backward[0]);
186
187 } else {
188 Architecture_t::Backward(gradients_backward[0], this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
189 this->GetDerivativesAt(0), this->GetActivationGradientsAt(0), this->GetWeightsAt(0),
190 activations_backward[0]);
191 }
192
193 addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),
194 this->GetWeightDecay(), this->GetRegularization());
195}
196
197//______________________________________________________________________________
198template <typename Architecture_t>
200{
201 std::cout << " DENSE Layer: \t ";
202 std::cout << " ( Input = " << this->GetWeightsAt(0).GetNcols(); // input size
203 std::cout << " , Width = " << this->GetWeightsAt(0).GetNrows() << " ) "; // layer width
204 if (this->GetOutput().size() > 0) {
205 std::cout << "\tOutput = ( " << this->GetOutput().size() << " , " << this->GetOutput()[0].GetNrows() << " , " << this->GetOutput()[0].GetNcols() << " ) ";
206 }
207 std::vector<std::string> activationNames = { "Identity","Relu","Sigmoid","Tanh","SymmRelu","SoftSign","Gauss" };
208 std::cout << "\t Activation Function = ";
209 std::cout << activationNames[ static_cast<int>(fF) ] << std::endl;
210}
211
212//______________________________________________________________________________
213
214template <typename Architecture_t>
216{
217 // write layer width activation function + weigbht and bias matrices
218
219 auto layerxml = gTools().xmlengine().NewChild(parent, 0, "DenseLayer");
220
221 gTools().xmlengine().NewAttr(layerxml, 0, "Width", gTools().StringFromInt(this->GetWidth()));
222
223 int activationFunction = static_cast<int>(this -> GetActivationFunction());
224 gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",
225 TString::Itoa(activationFunction, 10));
226 // write weights and bias matrix
227 this->WriteMatrixToXML(layerxml, "Weights", this -> GetWeightsAt(0));
228 this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0));
229}
230
231//______________________________________________________________________________
232template <typename Architecture_t>
234{
235 // Read layer weights and biases from XML
236 this->ReadMatrixXML(parent,"Weights", this -> GetWeightsAt(0));
237 this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
238
239}
240
241} // namespace DNN
242} // namespace TMVA
243
244#endif
#define f(i)
Definition: RSha256.hxx:104
static Int_t init()
static RooMathCoreReg dummy
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
Generic layer class.
Definition: DenseLayer.h:55
ERegularization fReg
The regularization method.
Definition: DenseLayer.h:66
ERegularization GetRegularization() const
Definition: DenseLayer.h:117
std::vector< Matrix_t > fDerivatives
First fDerivatives of the activations of this layer.
Definition: DenseLayer.h:61
void Backward(std::vector< Matrix_t > &gradients_backward, const std::vector< Matrix_t > &activations_backward, std::vector< Matrix_t > &inp1, std::vector< Matrix_t > &inp2)
Compute weight, bias and activation gradients.
Definition: DenseLayer.h:176
std::vector< Matrix_t > & GetDerivatives()
Definition: DenseLayer.h:111
Matrix_t & GetDerivativesAt(size_t i)
Definition: DenseLayer.h:113
typename Architecture_t::Matrix_t Matrix_t
Definition: DenseLayer.h:58
Scalar_t fWeightDecay
The weight decay.
Definition: DenseLayer.h:67
void Forward(std::vector< Matrix_t > &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition: DenseLayer.h:163
EActivationFunction fF
Activation function of the layer.
Definition: DenseLayer.h:65
TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability, EActivationFunction f, ERegularization reg, Scalar_t weightDecay)
Constructor.
Definition: DenseLayer.h:126
void Print() const
Printing the layer info.
Definition: DenseLayer.h:199
~TDenseLayer()
Destructor.
Definition: DenseLayer.h:156
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Definition: DenseLayer.h:215
Scalar_t GetDropoutProbability() const
Getters.
Definition: DenseLayer.h:108
EActivationFunction GetActivationFunction() const
Definition: DenseLayer.h:116
typename Architecture_t::Scalar_t Scalar_t
Definition: DenseLayer.h:57
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Definition: DenseLayer.h:233
Scalar_t fDropoutProbability
Probability that an input is active.
Definition: DenseLayer.h:63
Scalar_t GetWeightDecay() const
Definition: DenseLayer.h:118
const std::vector< Matrix_t > & GetDerivatives() const
Definition: DenseLayer.h:110
const Matrix_t & GetDerivativesAt(size_t i) const
Definition: DenseLayer.h:114
Generic General Layer class.
Definition: GeneralLayer.h:46
size_t fBatchSize
Batch size used for training and evaluation.
Definition: GeneralLayer.h:51
size_t fWidth
The width of this layer.
Definition: GeneralLayer.h:59
size_t GetBatchSize() const
Getters.
Definition: GeneralLayer.h:141
size_t GetWidth() const
Definition: GeneralLayer.h:147
TXMLEngine & xmlengine()
Definition: Tools.h:270
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition: TString.cxx:2000
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:578
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:707
EInitialization
Definition: Functions.h:70
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
Abstract ClassifierFactory template that handles arbitrary types.
Tools & gTools()