Logo ROOT  
Reference Guide
DenseLayer.h
Go to the documentation of this file.
1
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TDenseLayer *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Dense Layer Class *
12 * *
13 * Authors (alphabetical): *
14 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15 * *
16 * Copyright (c) 2005-2015: *
17 * CERN, Switzerland *
18 * U. of Victoria, Canada *
19 * MPI-K Heidelberg, Germany *
20 * U. of Bonn, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27#ifndef TMVA_DNN_DENSELAYER
28#define TMVA_DNN_DENSELAYER
29
30#include "TMatrix.h"
31
33#include "TMVA/DNN/Functions.h"
35
36#include <iostream>
37#include <iomanip>
38
39namespace TMVA {
40namespace DNN {
41/** \class TDenseLayer
42
43Generic layer class.
44
45This generic layer class represents a dense layer of a neural network with
46a given width n and activation function f. The activation function of each
47layer is given by \f$\mathbf{u} = \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$.
48
49In addition to the weight and bias matrices, each layer allocates memory
50for its activations and the corresponding input tensor before evaluation of
51the activation function as well as the gradients of the weights and biases.
52
53The layer provides member functions for the forward propagation of
54activations through the given layer.
55*/
56template <typename Architecture_t>
57class TDenseLayer : public VGeneralLayer<Architecture_t> {
58public:
59
60 using Scalar_t = typename Architecture_t::Scalar_t;
61 using Matrix_t = typename Architecture_t::Matrix_t;
62 using Tensor_t = typename Architecture_t::Tensor_t;
63
64private:
65
66 Tensor_t fInputActivation; /// output of GEMM and input to activation function
67 Tensor_t fDerivatives; /// activation functgion gradient
68
69 Scalar_t fDropoutProbability; ///< Probability that an input is active.
70
71 EActivationFunction fF; ///< Activation function of the layer.
72 ERegularization fReg; ///< The regularization method.
73 Scalar_t fWeightDecay; ///< The weight decay.
74
75 typename Architecture_t::ActivationDescriptor_t fActivationDesc; // the descriptor for the activation function
76
77public:
78 /*! Constructor */
79 TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability,
81
82 /*! Copy the dense layer provided as a pointer */
84
85 /*! Copy Constructor */
86 TDenseLayer(const TDenseLayer &);
87
88 /*! Destructor */
90
91 /*! Compute activation of the layer for the given input. The input
92 * must be in 3D tensor form with the different matrices corresponding to
93 * different events in the batch. Computes activations as well as
94 * the first partial derivative of the activation function at those
95 * activations. */
96 void Forward(Tensor_t &input, bool applyDropout = false);
97
98 /*! Compute weight, bias and activation gradients. Uses the precomputed
99 * first partial derviatives of the activation function computed during
100 * forward propagation and modifies them. Must only be called directly
101 * a the corresponding call to Forward(...). */
102 void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward );
103 /// std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);
104
105 /*! Printing the layer info. */
106 void Print() const;
107
108 /*! Writes the information and the weights about the layer in an XML node. */
109 virtual void AddWeightsXMLTo(void *parent);
110
111 /*! Read the information and the weights about the layer from XML node. */
112 virtual void ReadWeightsFromXML(void *parent);
113
114 /*! Set dropout probabilities */
115 virtual void SetDropoutProbability(Scalar_t dropoutProbability) { fDropoutProbability = dropoutProbability; }
116
117 /*! Getters */
119
120 /* return output of Gemm before computing the activation function */
123
127};
128
129//
130//
131// The Dense Layer Class - Implementation
132//______________________________________________________________________________
133template <typename Architecture_t>
134TDenseLayer<Architecture_t>::TDenseLayer(size_t batchSize, size_t inputWidth, size_t width, EInitialization init,
135 Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg,
137 : VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1, width, 1, width, inputWidth, 1, width, 1, 1,
138 batchSize, width, init),
139 fInputActivation(), fDropoutProbability(dropoutProbability), fF(f), fReg(reg), fWeightDecay(weightDecay)
140{
141 // should be {1, batchSize, width} but take from output
142 fInputActivation = Tensor_t ( this->GetOutput().GetShape() );
143 fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
144
145 Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
146}
147
148//______________________________________________________________________________
149template <typename Architecture_t>
151 VGeneralLayer<Architecture_t>(layer),
152 fInputActivation( layer->GetInputActivation().GetShape() ),
153 fDropoutProbability(layer->GetDropoutProbability()),
154 fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay())
155{
156 fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
157 Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
158}
159
160//______________________________________________________________________________
161template <typename Architecture_t>
163 VGeneralLayer<Architecture_t>(layer),
164 fInputActivation( layer->GetInputActivation()),
165 fDropoutProbability(layer.fDropoutProbability),
166 fF(layer.fF), fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)
167{
168 fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
169 Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
170}
171
172//______________________________________________________________________________
173template <typename Architecture_t>
175{
176 // release activation descriptor
177 Architecture_t::ReleaseDescriptor(fActivationDesc);
178}
179
180
181
182
183//______________________________________________________________________________
184template <typename Architecture_t>
185auto TDenseLayer<Architecture_t>::Forward( Tensor_t &input, bool applyDropout) -> void
186{
187 if (applyDropout && (this->GetDropoutProbability() != 1.0)) {
188 //
189 Architecture_t::DropoutForward(input, static_cast<TDescriptors *> (nullptr),
190 static_cast<TWorkspace *> (nullptr),
191 this->GetDropoutProbability());
192 }
193 Architecture_t::MultiplyTranspose(this->GetOutput() , input, this->GetWeightsAt(0));
194 Architecture_t::AddRowWise(this->GetOutput(), this->GetBiasesAt(0));
195
196 //evaluate<Architecture_t>(this->GetOutput(), this->GetActivationFunction());
197 Architecture_t::Copy(this->GetInputActivation(),this->GetOutput());
198
199 Architecture_t::ActivationFunctionForward(this->GetOutput(), this->GetActivationFunction(), fActivationDesc);
200}
201
202//______________________________________________________________________________
203template <typename Architecture_t>
204auto TDenseLayer<Architecture_t>::Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward) -> void
205/// std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
206//// /*inp2*/) -> void
207{
208
209 if (this->GetDropoutProbability() != 1.0) {
210 Architecture_t::DropoutBackward(this->GetActivationGradients(),
211 static_cast<TDescriptors *> (nullptr),
212 static_cast<TWorkspace *> (nullptr));
213 }
214
215 Architecture_t::ActivationFunctionBackward(fDerivatives, this->GetOutput(),
216 this->GetActivationGradients(), this->GetInputActivation(),
217 this->GetActivationFunction(), fActivationDesc);
218
219 Architecture_t::Backward(gradients_backward, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
220 fDerivatives, this->GetActivationGradients(), this->GetWeightsAt(0),
221 activations_backward);
222
223 addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),
224 this->GetWeightDecay(), this->GetRegularization());
225}
226
227//______________________________________________________________________________
228template <typename Architecture_t>
230{
231 std::cout << " DENSE Layer: \t";
232 std::cout << " ( Input =" << std::setw(6) << this->GetWeightsAt(0).GetNcols(); // input size
233 std::cout << " , Width =" << std::setw(6) << this->GetWeightsAt(0).GetNrows() << " ) "; // layer width
234
235 std::cout << "\tOutput = ( " << std::setw(2) << this->GetOutput().GetFirstSize() << " ," << std::setw(6) << this->GetOutput().GetShape()[0] << " ," << std::setw(6) << this->GetOutput().GetShape()[1] << " ) ";
236
237 std::vector<std::string> activationNames = { "Identity","Relu","Sigmoid","Tanh","SymmRelu","SoftSign","Gauss" };
238 std::cout << "\t Activation Function = ";
239 std::cout << activationNames[ static_cast<int>(fF) ];
240 if (fDropoutProbability != 1.) std::cout << "\t Dropout prob. = " << fDropoutProbability;
241 std::cout << std::endl;
242}
243
244//______________________________________________________________________________
245
246template <typename Architecture_t>
248{
249 // write layer width activation function + weigbht and bias matrices
250
251 auto layerxml = gTools().xmlengine().NewChild(parent, 0, "DenseLayer");
252
253 gTools().xmlengine().NewAttr(layerxml, 0, "Width", gTools().StringFromInt(this->GetWidth()));
254
255 int activationFunction = static_cast<int>(this -> GetActivationFunction());
256 gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",
257 TString::Itoa(activationFunction, 10));
258 // write weights and bias matrix
259 this->WriteMatrixToXML(layerxml, "Weights", this -> GetWeightsAt(0));
260 this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0));
261}
262
263//______________________________________________________________________________
264template <typename Architecture_t>
266{
267 // Read layer weights and biases from XML
268 this->ReadMatrixXML(parent,"Weights", this -> GetWeightsAt(0));
269 this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
270
271}
272
273
274} // namespace DNN
275} // namespace TMVA
276
277#endif
#define f(i)
Definition: RSha256.hxx:104
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
Generic layer class.
Definition: DenseLayer.h:57
ERegularization fReg
The regularization method.
Definition: DenseLayer.h:72
Tensor_t fDerivatives
output of GEMM and input to activation function
Definition: DenseLayer.h:67
ERegularization GetRegularization() const
Definition: DenseLayer.h:125
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward)
Compute weight, bias and activation gradients.
Definition: DenseLayer.h:204
typename Architecture_t::Matrix_t Matrix_t
Definition: DenseLayer.h:61
const Tensor_t & GetInputActivation() const
Definition: DenseLayer.h:121
Scalar_t fWeightDecay
The weight decay.
Definition: DenseLayer.h:73
Tensor_t fInputActivation
Definition: DenseLayer.h:66
Architecture_t::ActivationDescriptor_t fActivationDesc
Definition: DenseLayer.h:75
EActivationFunction fF
Activation function of the layer.
Definition: DenseLayer.h:71
TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability, EActivationFunction f, ERegularization reg, Scalar_t weightDecay)
Constructor.
Definition: DenseLayer.h:134
void Print() const
std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);
Definition: DenseLayer.h:229
~TDenseLayer()
Destructor.
Definition: DenseLayer.h:174
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Definition: DenseLayer.h:247
Scalar_t GetDropoutProbability() const
Getters.
Definition: DenseLayer.h:118
Tensor_t & GetInputActivation()
Definition: DenseLayer.h:122
EActivationFunction GetActivationFunction() const
Definition: DenseLayer.h:124
void Forward(Tensor_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition: DenseLayer.h:185
virtual void SetDropoutProbability(Scalar_t dropoutProbability)
Set dropout probabilities.
Definition: DenseLayer.h:115
typename Architecture_t::Scalar_t Scalar_t
Definition: DenseLayer.h:60
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Definition: DenseLayer.h:265
typename Architecture_t::Tensor_t Tensor_t
Definition: DenseLayer.h:62
Scalar_t fDropoutProbability
activation functgion gradient
Definition: DenseLayer.h:69
Scalar_t GetWeightDecay() const
Definition: DenseLayer.h:126
Generic General Layer class.
Definition: GeneralLayer.h:49
const Tensor_t & GetOutput() const
Definition: GeneralLayer.h:193
TXMLEngine & xmlengine()
Definition: Tools.h:270
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition: TString.cxx:2025
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:709
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:580
EvaluateInfo init(std::vector< RooRealProxy > parameters, std::vector< ArrayWrapper * > wrappers, std::vector< double * > arrays, size_t begin, size_t batchSize)
void Copy(void *source, void *dest)
EInitialization
Definition: Functions.h:70
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
create variable transformations
Tools & gTools()