Logo ROOT   6.14/05
Reference Guide
RNNLayer.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn/rnn:$Id$
2 // Author: Saurav Shekhar 19/07/17
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : BasicRNNLayer *
8  * *
9  * Description: *
10  * NeuralNetwork *
11  * *
12  * Authors (alphabetical): *
13  * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
14  * *
15  * Copyright (c) 2005-2015: *
16  * All rights reserved. *
17  * CERN, Switzerland *
18  * *
19  * For the licensing terms see $ROOTSYS/LICENSE. *
20  * For the list of contributors see $ROOTSYS/README/CREDITS. *
21  **********************************************************************************/
22 
23 //#pragma once
24 
25 //////////////////////////////////////////////////////////////////////
26 // <Description> //
27 //////////////////////////////////////////////////////////////////////
28 
29 #ifndef TMVA_DNN_RNN_LAYER
30 #define TMVA_DNN_RNN_LAYER
31 
32 #include <cmath>
33 #include <iostream>
34 #include <vector>
35 
36 #include "TMatrix.h"
37 #include "TMVA/DNN/Functions.h"
38 
39 namespace TMVA
40 {
41 namespace DNN
42 {
43 namespace RNN
44 {
45 
46 //______________________________________________________________________________
47 //
48 // Basic RNN Layer
49 //______________________________________________________________________________
50 
51 /** \class BasicRNNLayer
52  Generic implementation
53 */
54 template<typename Architecture_t>
55  class TBasicRNNLayer : public VGeneralLayer<Architecture_t>
56 {
57 
58 public:
59 
60  using Matrix_t = typename Architecture_t::Matrix_t;
61  using Scalar_t = typename Architecture_t::Scalar_t;
62  using Tensor_t = std::vector<Matrix_t>;
63 
64 private:
65 
66  size_t fTimeSteps; ///< Timesteps for RNN
67  size_t fStateSize; ///< Hidden state size of RNN
68  bool fRememberState; ///< Remember state in next pass
69 
70  DNN::EActivationFunction fF; ///< Activation function of the hidden state
71 
72  Matrix_t fState; ///< Hidden State
73  Matrix_t &fWeightsInput; ///< Input weights, fWeights[0]
74  Matrix_t &fWeightsState; ///< Prev state weights, fWeights[1]
75  Matrix_t &fBiases; ///< Biases
76 
77  std::vector<Matrix_t> fDerivatives; ///< First fDerivatives of the activations
78  Matrix_t &fWeightInputGradients; ///< Gradients w.r.t. the input weights
79  Matrix_t &fWeightStateGradients; ///< Gradients w.r.t. the recurring weights
80  Matrix_t &fBiasGradients; ///< Gradients w.r.t. the bias values
81 
82 public:
83 
84  /** Constructor */
85  TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize,
86  size_t timeSteps, bool rememberState = false,
88  bool training = true, DNN::EInitialization fA = DNN::EInitialization::kZero);
89 
90  /** Copy Constructor */
92 
93  /*! Initialize the weights according to the given initialization
94  ** method. */
95  //void Initialize(DNN::EInitialization m);
96 
97  /*! Initialize the state
98  ** method. */
100 
101  /*! Compute and return the next state with given input
102  * matrix */
103  void Forward(Tensor_t &input, bool isTraining = true);
104 
105  /*! Forward for a single cell (time unit) */
106  void CellForward(const Matrix_t &input, Matrix_t & dF);
107 
108  /*! Backpropagates the error. Must only be called directly at the corresponding
109  * call to Forward(...). */
110  void Backward(Tensor_t &gradients_backward,
111  const Tensor_t &activations_backward,
112  std::vector<Matrix_t> &inp1,
113  std::vector<Matrix_t> &inp2);
114 
115  /* Updates weights and biases, given the learning rate */
116  void Update(const Scalar_t learningRate);
117 
118  /*! Backward for a single time unit
119  * a the corresponding call to Forward(...). */
120  inline Matrix_t & CellBackward(Matrix_t & state_gradients_backward,
121  const Matrix_t & precStateActivations,
122  const Matrix_t & input, Matrix_t & input_gradient, Matrix_t &dF);
123 
124  /** Prints the info about the layer */
125  void Print() const;
126 
127  /*! Writes the information and the weights about the layer in an XML node. */
128  virtual void AddWeightsXMLTo(void *parent);
129 
130  /*! Read the information and the weights about the layer from XML node. */
131  virtual void ReadWeightsFromXML(void *parent);
132 
133 
134  /** Getters */
135  size_t GetTimeSteps() const { return fTimeSteps; }
136  size_t GetStateSize() const { return fStateSize; }
137  size_t GetInputSize() const { return this->GetInputWidth(); }
138  inline bool IsRememberState() const {return fRememberState;}
140  Matrix_t & GetState() {return fState;}
141  const Matrix_t & GetState() const {return fState;}
143  const Matrix_t & GetWeightsInput() const {return fWeightsInput;}
145  const Matrix_t & GetWeightsState() const {return fWeightsState;}
146  std::vector<Matrix_t> & GetDerivatives() {return fDerivatives;}
147  const std::vector<Matrix_t> & GetDerivatives() const {return fDerivatives;}
148  Matrix_t &GetDerivativesAt(size_t i) { return fDerivatives[i]; }
149  const Matrix_t &GetDerivativesAt(size_t i) const { return fDerivatives[i]; }
151  const Matrix_t & GetBiasesState() const {return fBiases;}
158 };
159 
160 //______________________________________________________________________________
161 //
162 // BasicRNNLayer Implementation
163 //______________________________________________________________________________
164 template <typename Architecture_t>
165 TBasicRNNLayer<Architecture_t>::TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps,
166  bool rememberState, DNN::EActivationFunction f, bool /*training*/,
168  // TODO inputDepth and outputDepth changed to batchSize??
169  : VGeneralLayer<Architecture_t>(batchSize, 1, timeSteps, inputSize, 1, timeSteps, stateSize, 2,
170  {stateSize, stateSize}, {inputSize, stateSize}, 1, {stateSize}, {1}, batchSize,
171  timeSteps, stateSize, fA),
172  fTimeSteps(timeSteps),
173  fStateSize(stateSize),
174  fRememberState(rememberState),
175  fF(f),
176  fState(batchSize, stateSize),
177  fWeightsInput(this->GetWeightsAt(0)),
178  fWeightsState(this->GetWeightsAt(1)),
179  fBiases(this->GetBiasesAt(0)),
183 {
184  for (size_t i = 0; i < timeSteps; ++i) {
185  fDerivatives.emplace_back(batchSize, stateSize);
186  }
187  // Nothing
188 }
189 
190 //______________________________________________________________________________
191 template <typename Architecture_t>
193  : VGeneralLayer<Architecture_t>(layer), fTimeSteps(layer.fTimeSteps), fStateSize(layer.fStateSize),
195  fState(layer.GetBatchSize(), layer.GetStateSize()), fWeightsInput(this->GetWeightsAt(0)),
196  fWeightsState(this->GetWeightsAt(1)), fBiases(this->GetBiasesAt(0)),
199 {
200  for (size_t i = 0; i < fTimeSteps; ++i) {
201  fDerivatives.emplace_back(layer.GetBatchSize(), layer.GetStateSize());
203  }
204  // Gradient matrices not copied
206 }
207 
208 //______________________________________________________________________________
209 //template<typename Architecture_t>
210 //auto TBasicRNNLayer<Architecture_t>::Initialize(DNN::EInitialization m)
211 //-> void
212 //{
213 // DNN::initialize<Architecture_t>(fWeightsInput, m);
214 // DNN::initialize<Architecture_t>(fWeightsState, m);
215 // DNN::initialize<Architecture_t>(fBiases, DNN::EInitialization::kZero);
216 //}
217 
218 //______________________________________________________________________________
219 template <typename Architecture_t>
221 {
222  DNN::initialize<Architecture_t>(this->GetState(), DNN::EInitialization::kZero);
223 }
224 
225 //______________________________________________________________________________
226 template<typename Architecture_t>
228 -> void
229 {
230  std::cout << " RECURRENT Layer: \t ";
231  std::cout << " (NInput = " << this->GetInputSize(); // input size
232  std::cout << ", NState = " << this->GetStateSize(); // hidden state size
233  std::cout << ", NTime = " << this->GetTimeSteps() << " )"; // time size
234  std::cout << "\tOutput = ( " << this->GetOutput().size() << " , " << this->GetOutput()[0].GetNrows() << " , " << this->GetOutput()[0].GetNcols() << " )\n";
235 }
236 
237 template <typename Architecture_t>
238 auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name = "matrix")
239 -> void
240 {
241  std::cout << name << "\n";
242  for (size_t i = 0; i < A.GetNrows(); ++i) {
243  for (size_t j = 0; j < A.GetNcols(); ++j) {
244  std::cout << A(i, j) << " ";
245  }
246  std::cout << "\n";
247  }
248  std::cout << "********\n";
249 }
250 
251 
252 //______________________________________________________________________________
253 template <typename Architecture_t>
254 auto inline TBasicRNNLayer<Architecture_t>::Forward(Tensor_t &input, bool /*isTraining*/) // B x T x D
255  -> void
256 {
257  // D : input size
258  // H : state size
259  // T : time size
260  // B : batch size
261 
262  Tensor_t arrInput;
263  for (size_t t = 0; t < fTimeSteps; ++t) arrInput.emplace_back(this->GetBatchSize(), this->GetInputWidth()); // T x B x D
264  Architecture_t::Rearrange(arrInput, input);
265  Tensor_t arrOutput;
266  for (size_t t = 0; t < fTimeSteps;++t) arrOutput.emplace_back(this->GetBatchSize(), fStateSize); // T x B x H
267 
269  for (size_t t = 0; t < fTimeSteps; ++t) {
270  CellForward(arrInput[t], fDerivatives[t]);
271  Architecture_t::Copy(arrOutput[t], fState);
272  }
273  Architecture_t::Rearrange(this->GetOutput(), arrOutput); // B x T x D
274 }
275 
276 //______________________________________________________________________________
277 template <typename Architecture_t>
279 -> void
280 {
281  // State = act(W_input . input + W_state . state + bias)
283  Matrix_t tmpState(fState.GetNrows(), fState.GetNcols());
284  Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsState);
285  Architecture_t::MultiplyTranspose(fState, input, fWeightsInput);
286  Architecture_t::ScaleAdd(fState, tmpState);
287  Architecture_t::AddRowWise(fState, fBiases);
288  DNN::evaluateDerivative<Architecture_t>(dF, fAF, fState);
289  DNN::evaluate<Architecture_t>(fState, fAF);
290 }
291 
292 //____________________________________________________________________________
293 template <typename Architecture_t>
294 auto inline TBasicRNNLayer<Architecture_t>::Backward(Tensor_t &gradients_backward, // B x T x D
295  const Tensor_t &activations_backward, // B x T x D
296  std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
297  /*inp2*/) -> void
298 {
299  // activations backward is input
300  // gradients_backward is activationGradients of layer before it, which is input layer
301  // currently gradient_backward is for input(x) and not for state
302  // TODO use this to change initial state??
303 
304 
305  bool dummy = false;
306  if (gradients_backward.size() == 0 || gradients_backward[0].GetNrows() == 0 || gradients_backward[0].GetNcols() == 0) {
307  dummy = true;
308  }
309  Tensor_t arr_gradients_backward;
310  for (size_t t = 0; t < fTimeSteps; ++t) arr_gradients_backward.emplace_back(this->GetBatchSize(), this->GetInputSize()); // T x B x D
311 
312  if (!dummy) {
313  // TODO gradients_backward will be written back on the matrix
314  //Architecture_t::Rearrange(arr_gradients_backward, gradients_backward);
315  }
316  Tensor_t arr_activations_backward;
317  for (size_t t = 0; t < fTimeSteps; ++t) arr_activations_backward.emplace_back(this->GetBatchSize(), this->GetInputSize()); // T x B x D
318  Architecture_t::Rearrange(arr_activations_backward, activations_backward);
319 
320  Matrix_t state_gradients_backward(this->GetBatchSize(), fStateSize); // B x H
321  DNN::initialize<Architecture_t>(state_gradients_backward, DNN::EInitialization::kZero);
322 
323  Matrix_t initState(this->GetBatchSize(), fStateSize); // B x H
324  DNN::initialize<Architecture_t>(initState, DNN::EInitialization::kZero);
325 
326  Tensor_t arr_output;
327  for (size_t t = 0; t < fTimeSteps; ++t) arr_output.emplace_back(this->GetBatchSize(), fStateSize);
328  Architecture_t::Rearrange(arr_output, this->GetOutput());
329 
330  Tensor_t arr_actgradients;
331  for (size_t t = 0; t < fTimeSteps; ++t) arr_actgradients.emplace_back(this->GetBatchSize(), fStateSize);
332  Architecture_t::Rearrange(arr_actgradients, this->GetActivationGradients());
333 
334  // reinitialize weights and biases gradients to 0
335  fWeightInputGradients.Zero();
336  fWeightStateGradients.Zero();
337  fBiasGradients.Zero();
338 
339  for (size_t t = fTimeSteps; t > 0; t--) {
340  //const Matrix_t & currStateActivations = arr_output[t - 1];
341  Architecture_t::ScaleAdd(state_gradients_backward, arr_actgradients[t - 1]);
342  if (t > 1) {
343  const Matrix_t & precStateActivations = arr_output[t - 2];
344  CellBackward(state_gradients_backward, precStateActivations, arr_activations_backward[t - 1],
345  arr_gradients_backward[t - 1], fDerivatives[t - 1]);
346  } else {
347  const Matrix_t & precStateActivations = initState;
348  CellBackward(state_gradients_backward, precStateActivations, arr_activations_backward[t - 1],
349  arr_gradients_backward[t - 1], fDerivatives[t - 1]);
350  }
351  }
352  if (!dummy) {
353  Architecture_t::Rearrange(gradients_backward, arr_gradients_backward );
354  }
355  //Architecture_t::Rearrange(arr_activations_backward, activations_backward);
356 }
357 
358 //______________________________________________________________________________
359 template <typename Architecture_t>
360 auto inline TBasicRNNLayer<Architecture_t>::CellBackward(Matrix_t & state_gradients_backward,
361  const Matrix_t & precStateActivations,
362  const Matrix_t & input, Matrix_t & input_gradient, Matrix_t &dF)
363 -> Matrix_t &
364 {
365  return Architecture_t::RecurrentLayerBackward(state_gradients_backward, fWeightInputGradients, fWeightStateGradients,
366  fBiasGradients, dF, precStateActivations, fWeightsInput,
367  fWeightsState, input, input_gradient);
368 }
369 
370 //______________________________________________________________________________
371 template <typename Architecture_t>
373 {
374  auto layerxml = gTools().xmlengine().NewChild(parent, 0, "RNNLayer");
375 
376  // write All other info like stateSize, inputSize, timeSteps,rememberState
377  gTools().xmlengine().NewAttr(layerxml, 0, "StateSize", gTools().StringFromInt(this->GetStateSize()));
378  gTools().xmlengine().NewAttr(layerxml, 0, "InputSize", gTools().StringFromInt(this->GetInputSize()));
379  gTools().xmlengine().NewAttr(layerxml, 0, "TimeSteps", gTools().StringFromInt(this->GetTimeSteps()));
380  gTools().xmlengine().NewAttr(layerxml, 0, "RememberState", gTools().StringFromInt(this->IsRememberState()));
381 
382  // write weights and bias matrices
383  this->WriteMatrixToXML(layerxml, "InputWeights", this -> GetWeightsAt(0));
384  this->WriteMatrixToXML(layerxml, "StateWeights", this -> GetWeightsAt(1));
385  this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0));
386 
387 
388 }
389 
390 //______________________________________________________________________________
391 template <typename Architecture_t>
393 {
394  // Read weights and biases
395  this->ReadMatrixXML(parent,"InputWeights", this -> GetWeightsAt(0));
396  this->ReadMatrixXML(parent,"StateWeights", this -> GetWeightsAt(1));
397  this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
398 
399 }
400 
401 
402 } // namespace RNN
403 } // namespace DNN
404 } // namespace TMVA
405 
406 #endif
size_t GetInputWidth() const
Definition: GeneralLayer.h:143
Generic General Layer class.
Definition: GeneralLayer.h:45
TXMLEngine & xmlengine()
Definition: Tools.h:270
Matrix_t & fBiases
Biases.
Definition: RNNLayer.h:75
auto * m
Definition: textangle.C:8
void ReadMatrixXML(void *node, const char *name, Matrix_t &matrix)
Definition: GeneralLayer.h:502
const Matrix_t & GetBiasesAt(size_t i) const
Definition: GeneralLayer.h:158
Matrix_t & fBiasGradients
Gradients w.r.t. the bias values.
Definition: RNNLayer.h:80
std::vector< Matrix_t > & GetDerivatives()
Definition: RNNLayer.h:146
#define f(i)
Definition: RSha256.hxx:104
const Matrix_t & GetState() const
Definition: RNNLayer.h:141
Matrix_t & fWeightsState
Prev state weights, fWeights[1].
Definition: RNNLayer.h:74
static double A[]
std::vector< Matrix_t > fDerivatives
First fDerivatives of the activations.
Definition: RNNLayer.h:77
EInitialization
Definition: Functions.h:70
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward, std::vector< Matrix_t > &inp1, std::vector< Matrix_t > &inp2)
Backpropagates the error.
Definition: RNNLayer.h:294
size_t GetBatchSize() const
Getters.
Definition: GeneralLayer.h:140
bool fRememberState
Remember state in next pass.
Definition: RNNLayer.h:68
const std::vector< Matrix_t > & GetActivationGradients() const
Definition: GeneralLayer.h:176
void Forward(Tensor_t &input, bool isTraining=true)
Compute and return the next state with given input matrix.
Definition: RNNLayer.h:254
Matrix_t & GetBiasStateGradients()
Definition: RNNLayer.h:152
const Matrix_t & GetWeightsState() const
Definition: RNNLayer.h:145
void Update(const Scalar_t learningRate)
void InitState(DNN::EInitialization m=DNN::EInitialization::kZero)
Initialize the weights according to the given initialization method.
Definition: RNNLayer.h:220
const Matrix_t & GetBiasGradientsAt(size_t i) const
Definition: GeneralLayer.h:170
const Matrix_t & GetBiasStateGradients() const
Definition: RNNLayer.h:153
const Matrix_t & GetWeightsInput() const
Definition: RNNLayer.h:143
Matrix_t & fWeightStateGradients
Gradients w.r.t. the recurring weights.
Definition: RNNLayer.h:79
void Print() const
Prints the info about the layer.
Definition: RNNLayer.h:227
Tools & gTools()
size_t GetTimeSteps() const
Getters.
Definition: RNNLayer.h:135
Matrix_t & fWeightInputGradients
Gradients w.r.t. the input weights.
Definition: RNNLayer.h:78
const Matrix_t & GetWeightsAt(size_t i) const
Definition: GeneralLayer.h:152
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:578
void Copy(void *source, void *dest)
TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, DNN::EActivationFunction f=DNN::EActivationFunction::kTanh, bool training=true, DNN::EInitialization fA=DNN::EInitialization::kZero)
Constructor.
Definition: RNNLayer.h:165
size_t GetInputSize() const
Definition: RNNLayer.h:137
void CellForward(const Matrix_t &input, Matrix_t &dF)
Forward for a single cell (time unit)
Definition: RNNLayer.h:278
static RooMathCoreReg dummy
const Matrix_t & GetBiasesState() const
Definition: RNNLayer.h:151
auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name="matrix") -> void
Definition: RNNLayer.h:238
const Matrix_t & GetWeightInputGradients() const
Definition: RNNLayer.h:155
size_t GetStateSize() const
Definition: RNNLayer.h:136
Matrix_t & GetWeightStateGradients()
Definition: RNNLayer.h:156
Abstract ClassifierFactory template that handles arbitrary types.
const Matrix_t & GetWeightStateGradients() const
Definition: RNNLayer.h:157
const Matrix_t & GetWeightGradientsAt(size_t i) const
Definition: GeneralLayer.h:164
Matrix_t & GetDerivativesAt(size_t i)
Definition: RNNLayer.h:148
Matrix_t & CellBackward(Matrix_t &state_gradients_backward, const Matrix_t &precStateActivations, const Matrix_t &input, Matrix_t &input_gradient, Matrix_t &dF)
Backward for a single time unit a the corresponding call to Forward(...).
Definition: RNNLayer.h:360
const std::vector< Matrix_t > & GetDerivatives() const
Definition: RNNLayer.h:147
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:707
Matrix_t & GetWeightInputGradients()
Definition: RNNLayer.h:154
std::vector< Matrix_t > Tensor_t
Definition: RNNLayer.h:62
void WriteMatrixToXML(void *node, const char *name, const Matrix_t &matrix)
Definition: GeneralLayer.h:479
DNN::EActivationFunction GetActivationFunction() const
Definition: RNNLayer.h:139
typename Architecture_t::Scalar_t Scalar_t
Definition: GeneralLayer.h:47
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
const std::vector< Matrix_t > & GetOutput() const
Definition: GeneralLayer.h:173
DNN::EActivationFunction fF
Activation function of the hidden state.
Definition: RNNLayer.h:70
Matrix_t & fWeightsInput
Input weights, fWeights[0].
Definition: RNNLayer.h:73
size_t fStateSize
Hidden state size of RNN.
Definition: RNNLayer.h:67
size_t fTimeSteps
Timesteps for RNN.
Definition: RNNLayer.h:66
const Matrix_t & GetDerivativesAt(size_t i) const
Definition: RNNLayer.h:149
char name[80]
Definition: TGX11.cxx:109
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Definition: RNNLayer.h:392
Matrix_t fState
Hidden State.
Definition: RNNLayer.h:72
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Definition: RNNLayer.h:372
typename Architecture_t::Matrix_t Matrix_t
Definition: RNNLayer.h:60