Logo ROOT   6.18/05
Reference Guide
RNNLayer.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn/rnn:$Id$
2// Author: Saurav Shekhar 19/07/17
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : BasicRNNLayer *
8 * *
9 * Description: *
10 * NeuralNetwork *
11 * *
12 * Authors (alphabetical): *
13 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
14 * *
15 * Copyright (c) 2005-2015: *
16 * All rights reserved. *
17 * CERN, Switzerland *
18 * *
19 * For the licensing terms see $ROOTSYS/LICENSE. *
20 * For the list of contributors see $ROOTSYS/README/CREDITS. *
21 **********************************************************************************/
22
23//#pragma once
24
25//////////////////////////////////////////////////////////////////////
26// <Description> //
27//////////////////////////////////////////////////////////////////////
28
29#ifndef TMVA_DNN_RNN_LAYER
30#define TMVA_DNN_RNN_LAYER
31
32#include <cmath>
33#include <iostream>
34#include <vector>
35
36#include "TMatrix.h"
37#include "TMVA/DNN/Functions.h"
38
39namespace TMVA
40{
41namespace DNN
42{
43namespace RNN
44{
45
46//______________________________________________________________________________
47//
48// Basic RNN Layer
49//______________________________________________________________________________
50
51/** \class BasicRNNLayer
52 Generic implementation
53*/
54template<typename Architecture_t>
55 class TBasicRNNLayer : public VGeneralLayer<Architecture_t>
56{
57
58public:
59
60 using Matrix_t = typename Architecture_t::Matrix_t;
61 using Scalar_t = typename Architecture_t::Scalar_t;
62 using Tensor_t = std::vector<Matrix_t>;
63
64private:
65
66 size_t fTimeSteps; ///< Timesteps for RNN
67 size_t fStateSize; ///< Hidden state size of RNN
68 bool fRememberState; ///< Remember state in next pass
69
70 DNN::EActivationFunction fF; ///< Activation function of the hidden state
71
72 Matrix_t fState; ///< Hidden State
73 Matrix_t &fWeightsInput; ///< Input weights, fWeights[0]
74 Matrix_t &fWeightsState; ///< Prev state weights, fWeights[1]
75 Matrix_t &fBiases; ///< Biases
76
77 std::vector<Matrix_t> fDerivatives; ///< First fDerivatives of the activations
78 Matrix_t &fWeightInputGradients; ///< Gradients w.r.t. the input weights
79 Matrix_t &fWeightStateGradients; ///< Gradients w.r.t. the recurring weights
80 Matrix_t &fBiasGradients; ///< Gradients w.r.t. the bias values
81
82public:
83
84 /** Constructor */
85 TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize,
86 size_t timeSteps, bool rememberState = false,
88 bool training = true, DNN::EInitialization fA = DNN::EInitialization::kZero);
89
90 /** Copy Constructor */
92
93 /*! Initialize the weights according to the given initialization
94 ** method. */
95 //void Initialize(DNN::EInitialization m);
96
97 /*! Initialize the state
98 ** method. */
100
101 /*! Compute and return the next state with given input
102 * matrix */
103 void Forward(Tensor_t &input, bool isTraining = true);
104
105 /*! Forward for a single cell (time unit) */
106 void CellForward(const Matrix_t &input, Matrix_t & dF);
107
108 /*! Backpropagates the error. Must only be called directly at the corresponding
109 * call to Forward(...). */
110 void Backward(Tensor_t &gradients_backward,
111 const Tensor_t &activations_backward,
112 std::vector<Matrix_t> &inp1,
113 std::vector<Matrix_t> &inp2);
114
115 /* Updates weights and biases, given the learning rate */
116 void Update(const Scalar_t learningRate);
117
118 /*! Backward for a single time unit
119 * a the corresponding call to Forward(...). */
120 inline Matrix_t & CellBackward(Matrix_t & state_gradients_backward,
121 const Matrix_t & precStateActivations,
122 const Matrix_t & input, Matrix_t & input_gradient, Matrix_t &dF);
123
124 /** Prints the info about the layer */
125 void Print() const;
126
127 /*! Writes the information and the weights about the layer in an XML node. */
128 virtual void AddWeightsXMLTo(void *parent);
129
130 /*! Read the information and the weights about the layer from XML node. */
131 virtual void ReadWeightsFromXML(void *parent);
132
133
134 /** Getters */
135 size_t GetTimeSteps() const { return fTimeSteps; }
136 size_t GetStateSize() const { return fStateSize; }
137 size_t GetInputSize() const { return this->GetInputWidth(); }
138 inline bool IsRememberState() const {return fRememberState;}
141 const Matrix_t & GetState() const {return fState;}
143 const Matrix_t & GetWeightsInput() const {return fWeightsInput;}
145 const Matrix_t & GetWeightsState() const {return fWeightsState;}
146 std::vector<Matrix_t> & GetDerivatives() {return fDerivatives;}
147 const std::vector<Matrix_t> & GetDerivatives() const {return fDerivatives;}
148 Matrix_t &GetDerivativesAt(size_t i) { return fDerivatives[i]; }
149 const Matrix_t &GetDerivativesAt(size_t i) const { return fDerivatives[i]; }
151 const Matrix_t & GetBiasesState() const {return fBiases;}
158};
159
160//______________________________________________________________________________
161//
162// BasicRNNLayer Implementation
163//______________________________________________________________________________
164template <typename Architecture_t>
165TBasicRNNLayer<Architecture_t>::TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps,
166 bool rememberState, DNN::EActivationFunction f, bool /*training*/,
168 // TODO inputDepth and outputDepth changed to batchSize??
169 : VGeneralLayer<Architecture_t>(batchSize, 1, timeSteps, inputSize, 1, timeSteps, stateSize, 2,
170 {stateSize, stateSize}, {inputSize, stateSize}, 1, {stateSize}, {1}, batchSize,
171 timeSteps, stateSize, fA),
172 fTimeSteps(timeSteps),
173 fStateSize(stateSize),
174 fRememberState(rememberState),
175 fF(f),
176 fState(batchSize, stateSize),
177 fWeightsInput(this->GetWeightsAt(0)),
178 fWeightsState(this->GetWeightsAt(1)),
179 fBiases(this->GetBiasesAt(0)),
180 fWeightInputGradients(this->GetWeightGradientsAt(0)),
181 fWeightStateGradients(this->GetWeightGradientsAt(1)),
182 fBiasGradients(this->GetBiasGradientsAt(0))
183{
184 for (size_t i = 0; i < timeSteps; ++i) {
185 fDerivatives.emplace_back(batchSize, stateSize);
186 }
187 // Nothing
188}
189
190//______________________________________________________________________________
191template <typename Architecture_t>
193 : VGeneralLayer<Architecture_t>(layer), fTimeSteps(layer.fTimeSteps), fStateSize(layer.fStateSize),
194 fRememberState(layer.fRememberState), fF(layer.GetActivationFunction()),
195 fState(layer.GetBatchSize(), layer.GetStateSize()), fWeightsInput(this->GetWeightsAt(0)),
196 fWeightsState(this->GetWeightsAt(1)), fBiases(this->GetBiasesAt(0)),
197 fDerivatives(), fWeightInputGradients(this->GetWeightGradientsAt(0)),
198 fWeightStateGradients(this->GetWeightGradientsAt(1)), fBiasGradients(this->GetBiasGradientsAt(0))
199{
200 for (size_t i = 0; i < fTimeSteps; ++i) {
201 fDerivatives.emplace_back(layer.GetBatchSize(), layer.GetStateSize());
203 }
204 // Gradient matrices not copied
206}
207
208//______________________________________________________________________________
209//template<typename Architecture_t>
210//auto TBasicRNNLayer<Architecture_t>::Initialize(DNN::EInitialization m)
211//-> void
212//{
213// DNN::initialize<Architecture_t>(fWeightsInput, m);
214// DNN::initialize<Architecture_t>(fWeightsState, m);
215// DNN::initialize<Architecture_t>(fBiases, DNN::EInitialization::kZero);
216//}
217
218//______________________________________________________________________________
219template <typename Architecture_t>
221{
222 DNN::initialize<Architecture_t>(this->GetState(), DNN::EInitialization::kZero);
223}
224
225//______________________________________________________________________________
226template<typename Architecture_t>
228-> void
229{
230 std::cout << " RECURRENT Layer: \t ";
231 std::cout << " (NInput = " << this->GetInputSize(); // input size
232 std::cout << ", NState = " << this->GetStateSize(); // hidden state size
233 std::cout << ", NTime = " << this->GetTimeSteps() << " )"; // time size
234 std::cout << "\tOutput = ( " << this->GetOutput().size() << " , " << this->GetOutput()[0].GetNrows() << " , " << this->GetOutput()[0].GetNcols() << " )\n";
235}
236
237template <typename Architecture_t>
238auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name = "matrix")
239-> void
240{
241 std::cout << name << "\n";
242 for (size_t i = 0; i < A.GetNrows(); ++i) {
243 for (size_t j = 0; j < A.GetNcols(); ++j) {
244 std::cout << A(i, j) << " ";
245 }
246 std::cout << "\n";
247 }
248 std::cout << "********\n";
249}
250
251
252//______________________________________________________________________________
253template <typename Architecture_t>
254auto inline TBasicRNNLayer<Architecture_t>::Forward(Tensor_t &input, bool /*isTraining*/) // B x T x D
255 -> void
256{
257 // D : input size
258 // H : state size
259 // T : time size
260 // B : batch size
261
262 Tensor_t arrInput;
263 for (size_t t = 0; t < fTimeSteps; ++t) arrInput.emplace_back(this->GetBatchSize(), this->GetInputWidth()); // T x B x D
264 Architecture_t::Rearrange(arrInput, input);
265 Tensor_t arrOutput;
266 for (size_t t = 0; t < fTimeSteps;++t) arrOutput.emplace_back(this->GetBatchSize(), fStateSize); // T x B x H
267
268 if (!this->fRememberState) InitState(DNN::EInitialization::kZero);
269 for (size_t t = 0; t < fTimeSteps; ++t) {
270 CellForward(arrInput[t], fDerivatives[t]);
271 Architecture_t::Copy(arrOutput[t], fState);
272 }
273 Architecture_t::Rearrange(this->GetOutput(), arrOutput); // B x T x D
274}
275
276//______________________________________________________________________________
277template <typename Architecture_t>
279-> void
280{
281 // State = act(W_input . input + W_state . state + bias)
282 const DNN::EActivationFunction fAF = this->GetActivationFunction();
283 Matrix_t tmpState(fState.GetNrows(), fState.GetNcols());
284 Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsState);
285 Architecture_t::MultiplyTranspose(fState, input, fWeightsInput);
286 Architecture_t::ScaleAdd(fState, tmpState);
287 Architecture_t::AddRowWise(fState, fBiases);
288 DNN::evaluateDerivative<Architecture_t>(dF, fAF, fState);
289 DNN::evaluate<Architecture_t>(fState, fAF);
290}
291
292//____________________________________________________________________________
293template <typename Architecture_t>
294auto inline TBasicRNNLayer<Architecture_t>::Backward(Tensor_t &gradients_backward, // B x T x D
295 const Tensor_t &activations_backward, // B x T x D
296 std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
297 /*inp2*/) -> void
298{
299 // activations backward is input
300 // gradients_backward is activationGradients of layer before it, which is input layer
301 // currently gradient_backward is for input(x) and not for state
302 // TODO use this to change initial state??
303
304
305 bool dummy = false;
306 if (gradients_backward.size() == 0 || gradients_backward[0].GetNrows() == 0 || gradients_backward[0].GetNcols() == 0) {
307 dummy = true;
308 }
309 Tensor_t arr_gradients_backward;
310 for (size_t t = 0; t < fTimeSteps; ++t) arr_gradients_backward.emplace_back(this->GetBatchSize(), this->GetInputSize()); // T x B x D
311
312 if (!dummy) {
313 // TODO gradients_backward will be written back on the matrix
314 //Architecture_t::Rearrange(arr_gradients_backward, gradients_backward);
315 }
316 Tensor_t arr_activations_backward;
317 for (size_t t = 0; t < fTimeSteps; ++t) arr_activations_backward.emplace_back(this->GetBatchSize(), this->GetInputSize()); // T x B x D
318 Architecture_t::Rearrange(arr_activations_backward, activations_backward);
319
320 Matrix_t state_gradients_backward(this->GetBatchSize(), fStateSize); // B x H
321 DNN::initialize<Architecture_t>(state_gradients_backward, DNN::EInitialization::kZero);
322
323 Matrix_t initState(this->GetBatchSize(), fStateSize); // B x H
324 DNN::initialize<Architecture_t>(initState, DNN::EInitialization::kZero);
325
326 Tensor_t arr_output;
327 for (size_t t = 0; t < fTimeSteps; ++t) arr_output.emplace_back(this->GetBatchSize(), fStateSize);
328 Architecture_t::Rearrange(arr_output, this->GetOutput());
329
330 Tensor_t arr_actgradients;
331 for (size_t t = 0; t < fTimeSteps; ++t) arr_actgradients.emplace_back(this->GetBatchSize(), fStateSize);
332 Architecture_t::Rearrange(arr_actgradients, this->GetActivationGradients());
333
334 // reinitialize weights and biases gradients to 0
335 fWeightInputGradients.Zero();
336 fWeightStateGradients.Zero();
337 fBiasGradients.Zero();
338
339 for (size_t t = fTimeSteps; t > 0; t--) {
340 //const Matrix_t & currStateActivations = arr_output[t - 1];
341 Architecture_t::ScaleAdd(state_gradients_backward, arr_actgradients[t - 1]);
342 if (t > 1) {
343 const Matrix_t & precStateActivations = arr_output[t - 2];
344 CellBackward(state_gradients_backward, precStateActivations, arr_activations_backward[t - 1],
345 arr_gradients_backward[t - 1], fDerivatives[t - 1]);
346 } else {
347 const Matrix_t & precStateActivations = initState;
348 CellBackward(state_gradients_backward, precStateActivations, arr_activations_backward[t - 1],
349 arr_gradients_backward[t - 1], fDerivatives[t - 1]);
350 }
351 }
352 if (!dummy) {
353 Architecture_t::Rearrange(gradients_backward, arr_gradients_backward );
354 }
355 //Architecture_t::Rearrange(arr_activations_backward, activations_backward);
356}
357
358//______________________________________________________________________________
359template <typename Architecture_t>
360auto inline TBasicRNNLayer<Architecture_t>::CellBackward(Matrix_t & state_gradients_backward,
361 const Matrix_t & precStateActivations,
362 const Matrix_t & input, Matrix_t & input_gradient, Matrix_t &dF)
363-> Matrix_t &
364{
365 return Architecture_t::RecurrentLayerBackward(state_gradients_backward, fWeightInputGradients, fWeightStateGradients,
366 fBiasGradients, dF, precStateActivations, fWeightsInput,
367 fWeightsState, input, input_gradient);
368}
369
370//______________________________________________________________________________
371template <typename Architecture_t>
373{
374 auto layerxml = gTools().xmlengine().NewChild(parent, 0, "RNNLayer");
375
376 // write All other info like stateSize, inputSize, timeSteps,rememberState
377 gTools().xmlengine().NewAttr(layerxml, 0, "StateSize", gTools().StringFromInt(this->GetStateSize()));
378 gTools().xmlengine().NewAttr(layerxml, 0, "InputSize", gTools().StringFromInt(this->GetInputSize()));
379 gTools().xmlengine().NewAttr(layerxml, 0, "TimeSteps", gTools().StringFromInt(this->GetTimeSteps()));
380 gTools().xmlengine().NewAttr(layerxml, 0, "RememberState", gTools().StringFromInt(this->IsRememberState()));
381
382 // write weights and bias matrices
383 this->WriteMatrixToXML(layerxml, "InputWeights", this -> GetWeightsAt(0));
384 this->WriteMatrixToXML(layerxml, "StateWeights", this -> GetWeightsAt(1));
385 this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0));
386
387
388}
389
390//______________________________________________________________________________
391template <typename Architecture_t>
393{
394 // Read weights and biases
395 this->ReadMatrixXML(parent,"InputWeights", this -> GetWeightsAt(0));
396 this->ReadMatrixXML(parent,"StateWeights", this -> GetWeightsAt(1));
397 this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
398
399}
400
401
402} // namespace RNN
403} // namespace DNN
404} // namespace TMVA
405
406#endif
#define f(i)
Definition: RSha256.hxx:104
static RooMathCoreReg dummy
char name[80]
Definition: TGX11.cxx:109
size_t GetStateSize() const
Definition: RNNLayer.h:136
DNN::EActivationFunction GetActivationFunction() const
Definition: RNNLayer.h:139
void InitState(DNN::EInitialization m=DNN::EInitialization::kZero)
Initialize the weights according to the given initialization method.
Definition: RNNLayer.h:220
const Matrix_t & GetWeightInputGradients() const
Definition: RNNLayer.h:155
void Print() const
Prints the info about the layer.
Definition: RNNLayer.h:227
const Matrix_t & GetWeightStateGradients() const
Definition: RNNLayer.h:157
Matrix_t & fWeightsInput
Input weights, fWeights[0].
Definition: RNNLayer.h:73
Matrix_t & fWeightsState
Prev state weights, fWeights[1].
Definition: RNNLayer.h:74
Matrix_t & fBiases
Biases.
Definition: RNNLayer.h:75
std::vector< Matrix_t > fDerivatives
First fDerivatives of the activations.
Definition: RNNLayer.h:77
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Definition: RNNLayer.h:392
Matrix_t & GetBiasStateGradients()
Definition: RNNLayer.h:152
size_t fStateSize
Hidden state size of RNN.
Definition: RNNLayer.h:67
const Matrix_t & GetState() const
Definition: RNNLayer.h:141
void Forward(Tensor_t &input, bool isTraining=true)
Compute and return the next state with given input matrix.
Definition: RNNLayer.h:254
const std::vector< Matrix_t > & GetDerivatives() const
Definition: RNNLayer.h:147
Matrix_t & CellBackward(Matrix_t &state_gradients_backward, const Matrix_t &precStateActivations, const Matrix_t &input, Matrix_t &input_gradient, Matrix_t &dF)
Backward for a single time unit a the corresponding call to Forward(...).
Definition: RNNLayer.h:360
TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, DNN::EActivationFunction f=DNN::EActivationFunction::kTanh, bool training=true, DNN::EInitialization fA=DNN::EInitialization::kZero)
Constructor.
Definition: RNNLayer.h:165
typename Architecture_t::Matrix_t Matrix_t
Definition: RNNLayer.h:60
Matrix_t fState
Hidden State.
Definition: RNNLayer.h:72
Matrix_t & fWeightInputGradients
Gradients w.r.t. the input weights.
Definition: RNNLayer.h:78
DNN::EActivationFunction fF
Activation function of the hidden state.
Definition: RNNLayer.h:70
Matrix_t & GetDerivativesAt(size_t i)
Definition: RNNLayer.h:148
size_t GetTimeSteps() const
Getters.
Definition: RNNLayer.h:135
std::vector< Matrix_t > & GetDerivatives()
Definition: RNNLayer.h:146
bool fRememberState
Remember state in next pass.
Definition: RNNLayer.h:68
Matrix_t & fWeightStateGradients
Gradients w.r.t. the recurring weights.
Definition: RNNLayer.h:79
Matrix_t & GetWeightInputGradients()
Definition: RNNLayer.h:154
const Matrix_t & GetBiasesState() const
Definition: RNNLayer.h:151
void Update(const Scalar_t learningRate)
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Definition: RNNLayer.h:372
size_t fTimeSteps
Timesteps for RNN.
Definition: RNNLayer.h:66
void CellForward(const Matrix_t &input, Matrix_t &dF)
Forward for a single cell (time unit)
Definition: RNNLayer.h:278
std::vector< Matrix_t > Tensor_t
Definition: RNNLayer.h:62
const Matrix_t & GetBiasStateGradients() const
Definition: RNNLayer.h:153
size_t GetInputSize() const
Definition: RNNLayer.h:137
Matrix_t & GetWeightStateGradients()
Definition: RNNLayer.h:156
Matrix_t & fBiasGradients
Gradients w.r.t. the bias values.
Definition: RNNLayer.h:80
const Matrix_t & GetWeightsInput() const
Definition: RNNLayer.h:143
const Matrix_t & GetWeightsState() const
Definition: RNNLayer.h:145
const Matrix_t & GetDerivativesAt(size_t i) const
Definition: RNNLayer.h:149
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward, std::vector< Matrix_t > &inp1, std::vector< Matrix_t > &inp2)
Backpropagates the error.
Definition: RNNLayer.h:294
Generic General Layer class.
Definition: GeneralLayer.h:46
typename Architecture_t::Scalar_t Scalar_t
Definition: GeneralLayer.h:48
size_t GetBatchSize() const
Getters.
Definition: GeneralLayer.h:144
size_t GetInputWidth() const
Definition: GeneralLayer.h:147
TXMLEngine & xmlengine()
Definition: Tools.h:270
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:580
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:709
static double A[]
void Copy(void *source, void *dest)
auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name="matrix") -> void
Definition: RNNLayer.h:238
EInitialization
Definition: Functions.h:70
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
create variable transformations
Tools & gTools()
auto * m
Definition: textangle.C:8