Logo ROOT  
Reference Guide
RNNLayer.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn/rnn:$Id$
2 // Author: Saurav Shekhar 19/07/17
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : BasicRNNLayer *
8  * *
9  * Description: *
10  * NeuralNetwork *
11  * *
12  * Authors (alphabetical): *
13  * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
14  * *
15  * Copyright (c) 2005-2015: *
16  * All rights reserved. *
17  * CERN, Switzerland *
18  * *
19  * For the licensing terms see $ROOTSYS/LICENSE. *
20  * For the list of contributors see $ROOTSYS/README/CREDITS. *
21  **********************************************************************************/
22 
23 //#pragma once
24 
25 //////////////////////////////////////////////////////////////////////
26 // <Description> //
27 //////////////////////////////////////////////////////////////////////
28 
29 #ifndef TMVA_DNN_RNN_LAYER
30 #define TMVA_DNN_RNN_LAYER
31 
32 #include <cmath>
33 #include <iostream>
34 #include <vector>
35 #include <string>
36 
37 #include "TMatrix.h"
38 #include "TMVA/DNN/Functions.h"
39 
40 namespace TMVA
41 {
42 namespace DNN
43 {
44 
45 namespace RNN {
46 
47 //______________________________________________________________________________
48 //
49 // Basic RNN Layer
50 //______________________________________________________________________________
51 
52 /** \class BasicRNNLayer
53  Generic implementation
54 */
55 template<typename Architecture_t>
56  class TBasicRNNLayer : public VGeneralLayer<Architecture_t>
57 {
58 
59 public:
60 
61  using Tensor_t = typename Architecture_t::Tensor_t;
62  using Matrix_t = typename Architecture_t::Matrix_t;
63  using Scalar_t = typename Architecture_t::Scalar_t;
64 
65  using LayerDescriptor_t = typename Architecture_t::RecurrentDescriptor_t;
66  using WeightsDescriptor_t = typename Architecture_t::FilterDescriptor_t;
67  using TensorDescriptor_t = typename Architecture_t::TensorDescriptor_t;
68  using HelperDescriptor_t = typename Architecture_t::DropoutDescriptor_t;
69 
70  using RNNWorkspace_t = typename Architecture_t::RNNWorkspace_t;
71  using RNNDescriptors_t = typename Architecture_t::RNNDescriptors_t;
72 
73 private:
74 
75  size_t fTimeSteps; ///< Timesteps for RNN
76  size_t fStateSize; ///< Hidden state size of RNN
77  bool fRememberState; ///< Remember state in next pass
78  bool fReturnSequence = false; ///< Return in output full sequence or just last element in time
79 
80  DNN::EActivationFunction fF; ///< Activation function of the hidden state
81 
82  Matrix_t fState; ///< Hidden State
83  Matrix_t &fWeightsInput; ///< Input weights, fWeights[0]
84  Matrix_t &fWeightsState; ///< Prev state weights, fWeights[1]
85  Matrix_t &fBiases; ///< Biases
86 
87  Tensor_t fDerivatives; ///< First fDerivatives of the activations
88  Matrix_t &fWeightInputGradients; ///< Gradients w.r.t. the input weights
89  Matrix_t &fWeightStateGradients; ///< Gradients w.r.t. the recurring weights
90  Matrix_t &fBiasGradients; ///< Gradients w.r.t. the bias values
91 
94 
95  typename Architecture_t::ActivationDescriptor_t fActivationDesc;
96 
97  TDescriptors *fDescriptors = nullptr; ///< Keeps all the RNN descriptors
98  TWorkspace *fWorkspace = nullptr; // workspace needed for GPU computation (CudNN)
99 
100  Matrix_t fCell; ///< Empty matrix for RNN
101 
102  // tensors used internally for the forward and backward pass
103  Tensor_t fX; ///< cached input tensor as T x B x I
104  Tensor_t fY; ///< cached output tensor as T x B x S
105  Tensor_t fDx; ///< cached gradient on the input (output of backward) as T x B x I
106  Tensor_t fDy; ///< cached activation gradient (input of backward) as T x B x S
107 
108 
109 public:
110 
111  /** Constructor */
112  TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize,
113  size_t timeSteps, bool rememberState = false, bool returnSequence = false,
115  bool training = true, DNN::EInitialization fA = DNN::EInitialization::kZero);
116 
117  /** Copy Constructor */
119 
120  /*! Destructor. */
121  virtual ~TBasicRNNLayer();
122 
123  /*! Initialize the weights according to the given initialization
124  ** method. */
125  virtual void Initialize();
126 
127  /*! Initialize the state
128  ** method. */
130 
131  /*! Compute and return the next state with given input
132  * matrix */
133  void Forward(Tensor_t &input, bool isTraining = true);
134 
135  /*! Forward for a single cell (time unit) */
136  void CellForward(const Matrix_t &input, Matrix_t & dF);
137 
138  /*! Backpropagates the error. Must only be called directly at the corresponding
139  * call to Forward(...). */
140  void Backward(Tensor_t &gradients_backward,
141  const Tensor_t &activations_backward);
142 
143  /* Updates weights and biases, given the learning rate */
144  void Update(const Scalar_t learningRate);
145 
146  /*! Backward for a single time unit
147  * a the corresponding call to Forward(...). */
148  inline Matrix_t & CellBackward(Matrix_t & state_gradients_backward,
149  const Matrix_t & precStateActivations,
150  const Matrix_t & input, Matrix_t & input_gradient, Matrix_t &dF);
151 
152  /** Prints the info about the layer */
153  void Print() const;
154 
155  /*! Writes the information and the weights about the layer in an XML node. */
156  virtual void AddWeightsXMLTo(void *parent);
157 
158  /*! Read the information and the weights about the layer from XML node. */
159  virtual void ReadWeightsFromXML(void *parent);
160 
161  void InitTensors();
162  // void InitializeDescriptors();
163  // void ReleaseDescriptors();
164  // void InitializeWorkspace();
165  // void FreeWorkspace();
166 
167  /** Getters */
168  size_t GetTimeSteps() const { return fTimeSteps; }
169  size_t GetStateSize() const { return fStateSize; }
170  size_t GetInputSize() const { return this->GetInputWidth(); }
171  inline bool DoesRememberState() const {return fRememberState;}
172  inline bool DoesReturnSequence() const { return fReturnSequence; }
174  Matrix_t & GetState() {return fState;} // RNN Hidden state
175  const Matrix_t & GetState() const {return fState;}
176  Matrix_t &GetCell() { return fCell; } // this returns an empty matrixfor RNN
177  const Matrix_t &GetCell() const { return fCell; }
178 
180  const Matrix_t & GetWeightsInput() const {return fWeightsInput;}
182  const Matrix_t & GetWeightsState() const {return fWeightsState;}
184  const Tensor_t & GetDerivatives() const {return fDerivatives;}
185  // Matrix_t &GetDerivativesAt(size_t i) { return fDerivatives[i]; }
186  // const Matrix_t &GetDerivativesAt(size_t i) const { return fDerivatives[i]; }
187 
189  const Matrix_t & GetBiasesState() const {return fBiases;}
196 
198  const Tensor_t &GetWeightsTensor() const { return fWeightsTensor; }
201 
202  Tensor_t &GetX() { return fX; }
203  Tensor_t &GetY() { return fY; }
204  Tensor_t &GetDX() { return fDx; }
205  Tensor_t &GetDY() { return fDy; }
206 };
207 
208 //______________________________________________________________________________
209 //
210 // BasicRNNLayer Implementation
211 //______________________________________________________________________________
212 template <typename Architecture_t>
213 TBasicRNNLayer<Architecture_t>::TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps,
214  bool rememberState, bool returnSequence, DNN::EActivationFunction f, bool /*training*/,
216  // TODO inputDepth and outputDepth changed to batchSize??
217  : VGeneralLayer<Architecture_t>(batchSize, 1, timeSteps, inputSize, 1, (returnSequence) ? timeSteps : 1 ,
218  stateSize, 2, {stateSize, stateSize}, {inputSize, stateSize}, 1, {stateSize}, {1},
219  batchSize, (returnSequence) ? timeSteps : 1, stateSize, fA),
220  fTimeSteps(timeSteps), fStateSize(stateSize), fRememberState(rememberState), fReturnSequence(returnSequence), fF(f), fState(batchSize, stateSize),
221  fWeightsInput(this->GetWeightsAt(0)), fWeightsState(this->GetWeightsAt(1)),
222  fBiases(this->GetBiasesAt(0)), fDerivatives(timeSteps, batchSize, stateSize), // create tensor time x bs x S
223  fWeightInputGradients(this->GetWeightGradientsAt(0)), fWeightStateGradients(this->GetWeightGradientsAt(1)),
224  fBiasGradients(this->GetBiasGradientsAt(0)), fWeightsTensor({0}), fWeightGradientsTensor({0})
225 {
226  InitTensors();
227 }
228 
229 //______________________________________________________________________________
230 template <typename Architecture_t>
232  : VGeneralLayer<Architecture_t>(layer), fTimeSteps(layer.fTimeSteps), fStateSize(layer.fStateSize),
233  fRememberState(layer.fRememberState), fReturnSequence(layer.fReturnSequence), fF(layer.GetActivationFunction()),
234  fState(layer.GetBatchSize(), layer.GetStateSize()),
235  fWeightsInput(this->GetWeightsAt(0)), fWeightsState(this->GetWeightsAt(1)), fBiases(this->GetBiasesAt(0)),
236  fDerivatives(layer.GetDerivatives().GetShape()), fWeightInputGradients(this->GetWeightGradientsAt(0)),
237  fWeightStateGradients(this->GetWeightGradientsAt(1)), fBiasGradients(this->GetBiasGradientsAt(0)),
238  fWeightsTensor({0}), fWeightGradientsTensor({0})
239 {
240 
241  Architecture_t::Copy(fDerivatives, layer.GetDerivatives() );
242 
243  // Gradient matrices not copied
244  Architecture_t::Copy(fState, layer.GetState());
245  InitTensors();
246 }
247 
248 template <typename Architecture_t>
250 {
251  if (fDescriptors) {
252  Architecture_t::ReleaseRNNDescriptors(fDescriptors);
253  delete fDescriptors;
254  }
255 
256  if (fWorkspace) {
257  Architecture_t::FreeRNNWorkspace(fWorkspace);
258  delete fWorkspace;
259  }
260 }
261 
262 //______________________________________________________________________________
263 template<typename Architecture_t>
265 {
266  // auto m = this->GetInitialization();
267  // DNN::initialize<Architecture_t>(fWeightsInput, m);
268  // DNN::initialize<Architecture_t>(fWeightsState, m);
269  // DNN::initialize<Architecture_t>(fBiases, DNN::EInitialization::kZero);
270 
272 
273  Architecture_t::InitializeRNNDescriptors(fDescriptors, this);
274  Architecture_t::InitializeRNNWorkspace(fWorkspace, fDescriptors, this);
275 }
276 
277 //______________________________________________________________________________
278 template <typename Architecture_t>
280 {
281  // fix output tensor for Cudnn must be a tensor of B x T x S of right layout
282  Architecture_t::InitializeRNNTensors(this);
283 }
284 //______________________________________________________________________________
285 template <typename Architecture_t>
287 {
288  DNN::initialize<Architecture_t>(this->GetState(), DNN::EInitialization::kZero);
289 
290  Architecture_t::InitializeActivationDescriptor(fActivationDesc,this->GetActivationFunction());
291 }
292 
293 //______________________________________________________________________________
294 template<typename Architecture_t>
296 -> void
297 {
298  std::cout << " RECURRENT Layer: \t ";
299  std::cout << " (NInput = " << this->GetInputSize(); // input size
300  std::cout << ", NState = " << this->GetStateSize(); // hidden state size
301  std::cout << ", NTime = " << this->GetTimeSteps() << " )"; // time size
302  std::cout << "\tOutput = ( " << this->GetOutput().GetFirstSize() << " , " << this->GetOutput().GetHSize() << " , " << this->GetOutput().GetWSize() << " )\n";
303 }
304 
305 template <typename Architecture_t>
306 auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name = "matrix")
307 -> void
308 {
309  std::cout << name << "\n";
310  for (size_t i = 0; i < A.GetNrows(); ++i) {
311  for (size_t j = 0; j < A.GetNcols(); ++j) {
312  std::cout << A(i, j) << " ";
313  }
314  std::cout << "\n";
315  }
316  std::cout << "********\n";
317 }
318 
319 
320 //______________________________________________________________________________
321 template <typename Architecture_t>
322 void TBasicRNNLayer<Architecture_t>::Forward(Tensor_t &input, bool isTraining ) // B x T x D
323 {
324 
325 
326  // for Cudnn
327  if (Architecture_t::IsCudnn()) {
328 
329  Tensor_t &x = this->fX;
330  Tensor_t &y = this->fY;
331 
332  Architecture_t::Rearrange(x, input);
333 
334  const auto &weights = this->GetWeightsAt(0);
335  // Tensor_t cx({1}); // not used for normal RNN
336  // Tensor_t cy({1}); // not used for normal RNN
337 
338  // hx is fState - tensor are of right shape
339  auto &hx = this->GetState();
340  auto &cx = this->GetCell();
341  // use same for hy and cy
342  auto &hy = this->GetState();
343  auto &cy = this->GetCell();
344 
345  auto rnnDesc = static_cast<RNNDescriptors_t &>(*fDescriptors);
346  auto rnnWork = static_cast<RNNWorkspace_t &>(*fWorkspace);
347 
348  Architecture_t::RNNForward(x, hx, cx, weights, y, hy, cy, rnnDesc, rnnWork, isTraining);
349 
350  if (fReturnSequence) {
351  Architecture_t::Rearrange(this->GetOutput(), y); // swap B and T from y to Output
352  }
353  else {
354  // tmp is a reference to y (full cudnn output)
355  Tensor_t tmp = (y.At(y.GetShape()[0] - 1)).Reshape({y.GetShape()[1], 1, y.GetShape()[2]});
356  Architecture_t::Copy(this->GetOutput(), tmp);
357  }
358 
359  return;
360  }
361 
362  // FORWARD for CPU architecture
363  // D : input size
364  // H : state size
365  // T : time size
366  // B : batch size
367 
368  Tensor_t arrInput (fTimeSteps, this->GetBatchSize(), this->GetInputWidth() );
369  //for (size_t t = 0; t < fTimeSteps; ++t) arrInput.emplace_back(this->GetBatchSize(), this->GetInputWidth()); // T x B x D
370  Architecture_t::Rearrange(arrInput, input);
371  Tensor_t arrOutput ( fTimeSteps, this->GetBatchSize(), fStateSize);
372  //for (size_t t = 0; t < fTimeSteps;++t) arrOutput.emplace_back(this->GetBatchSize(), fStateSize); // T x B x H
373 
374  if (!this->fRememberState) InitState(DNN::EInitialization::kZero);
375 
376  for (size_t t = 0; t < fTimeSteps; ++t) {
377  Matrix_t arrInput_m = arrInput.At(t).GetMatrix();
378  Matrix_t df_m = fDerivatives.At(t).GetMatrix();
379  CellForward(arrInput_m, df_m );
380  Matrix_t arrOutput_m = arrOutput.At(t).GetMatrix();
381  Architecture_t::Copy(arrOutput_m, fState);
382  }
383 
384  if (fReturnSequence)
385  Architecture_t::Rearrange(this->GetOutput(), arrOutput); // B x T x D
386  else {
387  // get T[end[]]
388 
389  Tensor_t tmp = arrOutput.At(fTimeSteps - 1); // take last time step
390  // shape of tmp is for CPU (columnwise) B x D , need to reshape to make a B x D x 1
391  // and transpose it to 1 x D x B (this is how output is expected in columnmajor format)
392  tmp = tmp.Reshape({tmp.GetShape()[0], tmp.GetShape()[1], 1});
393  assert(tmp.GetSize() == this->GetOutput().GetSize());
394  assert(tmp.GetShape()[0] == this->GetOutput().GetShape()[2]); // B is last dim in output and first in tmp
395  Architecture_t::Rearrange(this->GetOutput(), tmp);
396  // keep array output
397  fY = arrOutput;
398  }
399 }
400 
401 //______________________________________________________________________________
402 template <typename Architecture_t>
404 -> void
405 {
406  // State = act(W_input . input + W_state . state + bias)
407  const DNN::EActivationFunction fAF = this->GetActivationFunction();
408  Matrix_t tmpState(fState.GetNrows(), fState.GetNcols());
409  Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsState);
410  Architecture_t::MultiplyTranspose(fState, input, fWeightsInput);
411  Architecture_t::ScaleAdd(fState, tmpState);
412  Architecture_t::AddRowWise(fState, fBiases);
413  Tensor_t inputActivFunc(dF);
414  Tensor_t tState(fState);
415 
416  // DNN::evaluateDerivative<Architecture_t>(dFt, fAF, fState);
417  // DNN::evaluate<Architecture_t>(tState, fAF);
418 
419  Architecture_t::Copy(inputActivFunc, tState);
420  Architecture_t::ActivationFunctionForward(tState, fAF, fActivationDesc);
421 
422 }
423 
424 //____________________________________________________________________________
425 template <typename Architecture_t>
426 auto inline TBasicRNNLayer<Architecture_t>::Backward(Tensor_t &gradients_backward, // B x T x D
427  const Tensor_t &activations_backward) -> void // B x T x D
428  // std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
429  // /*inp2*/) -> void
430 {
431  //BACKWARD for CUDNN
432  if (Architecture_t::IsCudnn() ) {
433 
434  Tensor_t &x = this->fX;
435  Tensor_t &y = this->fY;
436  Tensor_t &dx = this->fDy;
437  Tensor_t &dy = this->fDy;
438 
439  // input size is stride[1] of input tensor that is B x T x inputSize
440  assert(activations_backward.GetStrides()[1] == this->GetInputSize() );
441 
442  Architecture_t::Rearrange(x, activations_backward);
443 
444  if (!fReturnSequence) {
445 
446  //Architecture_t::InitializeZero(dy);
447  Architecture_t::InitializeZero(dy);
448 
449  //Tensor_t tmp1 = y.At(y.GetShape()[0] - 1).Reshape({y.GetShape()[1], 1, y.GetShape()[2]});
450  Tensor_t tmp2 = dy.At(dy.GetShape()[0] - 1).Reshape({dy.GetShape()[1], 1, dy.GetShape()[2]});
451 
452  //Architecture_t::Copy(tmp1, this->GetOutput());
453  Architecture_t::Copy(tmp2, this->GetActivationGradients());
454  }
455  else {
456  Architecture_t::Rearrange(y, this->GetOutput());
457  Architecture_t::Rearrange(dy, this->GetActivationGradients());
458  }
459 
460 
461 
462  // for cudnn Matrix_t and Tensor_t are same type
463  const auto &weights = this->GetWeightsTensor();
464  auto &weightGradients = this->GetWeightGradientsTensor();
465  // note that cudnnRNNBackwardWeights accumulate the weight gradients.
466  // We need then to initialize the tensor to zero every time
467  Architecture_t::InitializeZero(weightGradients);
468 
469  // hx is fState
470  auto &hx = this->GetState();
471  auto cx = this->GetCell();
472  // use same for hy and cy
473  auto &dhy = hx;
474  auto &dcy = cx;
475  auto &dhx = hx;
476  auto &dcx = cx;
477 
478 
479  auto rnnDesc = static_cast<RNNDescriptors_t &>(*fDescriptors);
480  auto rnnWork = static_cast<RNNWorkspace_t &>(*fWorkspace);
481 
482  Architecture_t::RNNBackward(x, hx, cx, y, dy, dhy, dcy, weights, dx, dhx, dcx, weightGradients, rnnDesc, rnnWork);
483 
484  //Architecture_t::PrintTensor(this->GetOutput(), "output after bwd");
485 
486  if (gradients_backward.GetSize() != 0)
487  Architecture_t::Rearrange(gradients_backward, dx);
488 
489  return;
490  }
491 
492  // BACKWARD FOR CPU
493  // activations backward is input
494  // gradients_backward is activationGradients of layer before it, which is input layer
495  // currently gradient_backward is for input(x) and not for state
496  // TODO use this to change initial state??
497 
498 
499  bool dummy = false;
500  if (gradients_backward.GetSize() == 0) {
501  dummy = true;
502  }
503  Tensor_t arr_gradients_backward ( fTimeSteps, this->GetBatchSize(), this->GetInputSize());
504  //for (size_t t = 0; t < fTimeSteps; ++t) arr_gradients_backward.emplace_back(this->GetBatchSize(), this->GetInputSize()); // T x B x D
505 
506  if (!dummy) {
507  // TODO gradients_backward will be written back on the matrix
508  //Architecture_t::Rearrange(arr_gradients_backward, gradients_backward);
509  }
510  Tensor_t arr_activations_backward ( fTimeSteps, this->GetBatchSize(), this->GetInputSize());
511  //for (size_t t = 0; t < fTimeSteps; ++t) arr_activations_backward.emplace_back(this->GetBatchSize(), this->GetInputSize()); // T x B x D
512  Architecture_t::Rearrange(arr_activations_backward, activations_backward);
513 
514  Matrix_t state_gradients_backward(this->GetBatchSize(), fStateSize); // B x H
515  DNN::initialize<Architecture_t>(state_gradients_backward, DNN::EInitialization::kZero);
516 
517  Matrix_t initState(this->GetBatchSize(), fStateSize); // B x H
518  DNN::initialize<Architecture_t>(initState, DNN::EInitialization::kZero);
519 
520  Tensor_t arr_output ( fTimeSteps, this->GetBatchSize(), fStateSize);
521  Tensor_t arr_actgradients(fTimeSteps, this->GetBatchSize(), fStateSize);
522 
523  if (fReturnSequence) {
524  Architecture_t::Rearrange(arr_output, this->GetOutput());
525  Architecture_t::Rearrange(arr_actgradients, this->GetActivationGradients());
526  } else {
527  //
528  arr_output = fY;
529 
530  Architecture_t::InitializeZero(arr_actgradients);
531  // need to reshape to pad a time dimension = 1 (note here is columnmajor tensors)
532  Tensor_t tmp_grad = arr_actgradients.At(fTimeSteps - 1).Reshape({this->GetBatchSize(), fStateSize, 1});
533  assert(tmp_grad.GetSize() == this->GetActivationGradients().GetSize());
534  assert(tmp_grad.GetShape()[0] ==
535  this->GetActivationGradients().GetShape()[2]); // B in tmp is [0] and [2] in input act. gradients
536 
537  Architecture_t::Rearrange(tmp_grad, this->GetActivationGradients());
538  }
539 
540  // reinitialize weights and biases gradients to 0
541  fWeightInputGradients.Zero();
542  fWeightStateGradients.Zero();
543  fBiasGradients.Zero();
544 
545  for (size_t t = fTimeSteps; t > 0; t--) {
546  //const Matrix_t & currStateActivations = arr_output[t - 1];
547  Matrix_t actgrad_m = arr_actgradients.At(t - 1).GetMatrix();
548  Architecture_t::ScaleAdd(state_gradients_backward, actgrad_m);
549 
550  Matrix_t actbw_m = arr_activations_backward.At(t - 1).GetMatrix();
551  Matrix_t gradbw_m = arr_gradients_backward.At(t - 1).GetMatrix();
552 
553  // compute derivatives of activations
554  Tensor_t df = fDerivatives.At(t-1);
555  Tensor_t dy = Tensor_t(state_gradients_backward);
556  //Tensor_t dy = arr_actgradients.At(t - 1);
557  Tensor_t y = arr_output.At(t-1);
558  Architecture_t::ActivationFunctionBackward(df, y,
559  dy, df, //do in place (should work)
560  this->GetActivationFunction(), fActivationDesc);
561 
562  Matrix_t df_m = df.GetMatrix();
563 
564  // Architecture_t::PrintTensor(df, "dy before");
565  if (t > 1) {
566  Matrix_t precStateActivations = arr_output.At(t - 2).GetMatrix();
567  CellBackward(state_gradients_backward, precStateActivations, actbw_m, gradbw_m, df_m);
568 
569  } else {
570  const Matrix_t & precStateActivations = initState;
571  CellBackward(state_gradients_backward, precStateActivations, actbw_m, gradbw_m, df_m);
572 
573  }
574  }
575  if (!dummy) {
576  Architecture_t::Rearrange(gradients_backward, arr_gradients_backward );
577  }
578 }
579 
580 //______________________________________________________________________________
581 template <typename Architecture_t>
582 auto inline TBasicRNNLayer<Architecture_t>::CellBackward(Matrix_t & state_gradients_backward,
583  const Matrix_t & precStateActivations,
584  const Matrix_t & input, Matrix_t & input_gradient, Matrix_t &dF)
585 -> Matrix_t &
586 {
587  return Architecture_t::RecurrentLayerBackward(state_gradients_backward, fWeightInputGradients, fWeightStateGradients,
588  fBiasGradients, dF, precStateActivations, fWeightsInput,
589  fWeightsState, input, input_gradient);
590 }
591 
592 //______________________________________________________________________________
593 template <typename Architecture_t>
595 {
596  auto layerxml = gTools().xmlengine().NewChild(parent, 0, "RNNLayer");
597 
598  // write All other info like stateSize, inputSize, timeSteps,rememberState
599  gTools().xmlengine().NewAttr(layerxml, 0, "StateSize", gTools().StringFromInt(this->GetStateSize()));
600  gTools().xmlengine().NewAttr(layerxml, 0, "InputSize", gTools().StringFromInt(this->GetInputSize()));
601  gTools().xmlengine().NewAttr(layerxml, 0, "TimeSteps", gTools().StringFromInt(this->GetTimeSteps()));
602  gTools().xmlengine().NewAttr(layerxml, 0, "RememberState", gTools().StringFromInt(this->DoesRememberState()));
603  gTools().xmlengine().NewAttr(layerxml, 0, "ReturnSequence", gTools().StringFromInt(this->DoesReturnSequence()));
604 
605  // write weights and bias matrices
606  this->WriteMatrixToXML(layerxml, "InputWeights", this -> GetWeightsAt(0));
607  this->WriteMatrixToXML(layerxml, "StateWeights", this -> GetWeightsAt(1));
608  this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0));
609 
610 
611 }
612 
613 //______________________________________________________________________________
614 template <typename Architecture_t>
616 {
617  // Read weights and biases
618  this->ReadMatrixXML(parent,"InputWeights", this -> GetWeightsAt(0));
619  this->ReadMatrixXML(parent,"StateWeights", this -> GetWeightsAt(1));
620  this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
621 
622 }
623 
624 } // namespace RNN
625 } // namespace DNN
626 } // namespace TMVA
627 
628 #endif
m
auto * m
Definition: textangle.C:8
TXMLEngine::NewAttr
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:583
TMVA::DNN::VGeneralLayer::Scalar_t
typename Architecture_t::Scalar_t Scalar_t
Definition: GeneralLayer.h:55
TMVA::DNN::RNN::TBasicRNNLayer::fDescriptors
TDescriptors * fDescriptors
Keeps all the RNN descriptors.
Definition: RNNLayer.h:97
TMVA::DNN::RNN::TBasicRNNLayer::GetDX
Tensor_t & GetDX()
Definition: RNNLayer.h:204
TMVA::DNN::RNN::TBasicRNNLayer::GetWeightsInput
Matrix_t & GetWeightsInput()
Definition: RNNLayer.h:179
f
#define f(i)
Definition: RSha256.hxx:104
TMVA::DNN::RNN::TBasicRNNLayer::RNNWorkspace_t
typename Architecture_t::RNNWorkspace_t RNNWorkspace_t
Definition: RNNLayer.h:70
TMVA::DNN::RNN::TBasicRNNLayer::GetX
Tensor_t & GetX()
Definition: RNNLayer.h:202
TXMLEngine::NewChild
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:712
TMVA::DNN::RNN::TBasicRNNLayer::GetCell
const Matrix_t & GetCell() const
Definition: RNNLayer.h:177
TMVA::DNN::EInitialization::kZero
@ kZero
TMVA::DNN::RNN::TBasicRNNLayer::Forward
void Forward(Tensor_t &input, bool isTraining=true)
Compute and return the next state with given input matrix.
Definition: RNNLayer.h:322
TMVA::DNN::RNN::TBasicRNNLayer::GetState
const Matrix_t & GetState() const
Definition: RNNLayer.h:175
TMVA::DNN::RNN::TBasicRNNLayer::GetWeightGradientsTensor
const Tensor_t & GetWeightGradientsTensor() const
Definition: RNNLayer.h:200
ROOT::Math::GSLSimAn::Copy
void Copy(void *source, void *dest)
Definition: GSLSimAnnealing.cxx:149
TMVA::DNN::RNN::TBasicRNNLayer::fWeightsInput
Matrix_t & fWeightsInput
Input weights, fWeights[0].
Definition: RNNLayer.h:83
TMVA::DNN::RNN::TBasicRNNLayer::InitTensors
void InitTensors()
Definition: RNNLayer.h:279
TMVA::DNN::RNN::TBasicRNNLayer::GetState
Matrix_t & GetState()
Definition: RNNLayer.h:174
TMVA::DNN::RNN::TBasicRNNLayer::TensorDescriptor_t
typename Architecture_t::TensorDescriptor_t TensorDescriptor_t
Definition: RNNLayer.h:67
TMVA::DNN::RNN::TBasicRNNLayer::GetWeightStateGradients
const Matrix_t & GetWeightStateGradients() const
Definition: RNNLayer.h:195
TMVA::DNN::RNN::TBasicRNNLayer::GetDerivatives
Tensor_t & GetDerivatives()
Definition: RNNLayer.h:183
TMVA::DNN::RNN::TBasicRNNLayer::GetWeightInputGradients
const Matrix_t & GetWeightInputGradients() const
Definition: RNNLayer.h:193
TMVA::DNN::RNN::TBasicRNNLayer::Update
void Update(const Scalar_t learningRate)
TMVA::DNN::RNN::TBasicRNNLayer::fWeightInputGradients
Matrix_t & fWeightInputGradients
Gradients w.r.t. the input weights.
Definition: RNNLayer.h:88
TMVA::DNN::RNN::TBasicRNNLayer::GetWeightsState
Matrix_t & GetWeightsState()
Definition: RNNLayer.h:181
TMVA::DNN::RNN::TBasicRNNLayer::fX
Tensor_t fX
cached input tensor as T x B x I
Definition: RNNLayer.h:103
x
Double_t x[n]
Definition: legend1.C:17
TMVA::DNN::RNN::TBasicRNNLayer::GetBiasesState
const Matrix_t & GetBiasesState() const
Definition: RNNLayer.h:189
TMVA::DNN::RNN::TBasicRNNLayer::~TBasicRNNLayer
virtual ~TBasicRNNLayer()
Destructor.
Definition: RNNLayer.h:249
TMVA::DNN::RNN::TBasicRNNLayer::fReturnSequence
bool fReturnSequence
Return in output full sequence or just last element in time.
Definition: RNNLayer.h:78
TMVA::DNN::RNN::TBasicRNNLayer::GetCell
Matrix_t & GetCell()
Definition: RNNLayer.h:176
TMVA::DNN::RNN::TBasicRNNLayer::AddWeightsXMLTo
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Definition: RNNLayer.h:594
ROOT::Math::Cephes::A
static double A[]
Definition: SpecFuncCephes.cxx:170
TMVA::DNN::RNN::TBasicRNNLayer::TBasicRNNLayer
TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, DNN::EActivationFunction f=DNN::EActivationFunction::kTanh, bool training=true, DNN::EInitialization fA=DNN::EInitialization::kZero)
Constructor.
Definition: RNNLayer.h:213
TMVA::DNN::EActivationFunction::kTanh
@ kTanh
TMVA::DNN::RNN::TBasicRNNLayer::Matrix_t
typename Architecture_t::Matrix_t Matrix_t
Definition: RNNLayer.h:62
TMVA::DNN::RNN::TBasicRNNLayer::fDerivatives
Tensor_t fDerivatives
First fDerivatives of the activations.
Definition: RNNLayer.h:87
TMVA::DNN::RNN::TBasicRNNLayer::fRememberState
bool fRememberState
Remember state in next pass.
Definition: RNNLayer.h:77
TMatrix.h
TMVA::DNN::RNN::TBasicRNNLayer::GetStateSize
size_t GetStateSize() const
Definition: RNNLayer.h:169
TMVA::Tools::xmlengine
TXMLEngine & xmlengine()
Definition: Tools.h:268
TMVA::DNN::RNN::TBasicRNNLayer::GetWeightsInput
const Matrix_t & GetWeightsInput() const
Definition: RNNLayer.h:180
TMVA::DNN::RNN::TBasicRNNLayer::GetDerivatives
const Tensor_t & GetDerivatives() const
Definition: RNNLayer.h:184
TMVA::DNN::RNN::TBasicRNNLayer::CellBackward
Matrix_t & CellBackward(Matrix_t &state_gradients_backward, const Matrix_t &precStateActivations, const Matrix_t &input, Matrix_t &input_gradient, Matrix_t &dF)
Backward for a single time unit a the corresponding call to Forward(...).
Definition: RNNLayer.h:582
TMVA::DNN::RNN::TBasicRNNLayer::InitState
void InitState(DNN::EInitialization m=DNN::EInitialization::kZero)
Initialize the state method.
Definition: RNNLayer.h:286
TMVA::DNN::RNN::TBasicRNNLayer::fStateSize
size_t fStateSize
Hidden state size of RNN.
Definition: RNNLayer.h:76
TMVA::DNN::VGeneralLayer
Generic General Layer class.
Definition: GeneralLayer.h:51
TMVA::DNN::TWorkspace
Definition: ContextHandles.h:32
TMVA::DNN::RNN::TBasicRNNLayer
Definition: RNNLayer.h:57
TMVA::DNN::RNN::TBasicRNNLayer::fWeightsState
Matrix_t & fWeightsState
Prev state weights, fWeights[1].
Definition: RNNLayer.h:84
TMVA::DNN::RNN::TBasicRNNLayer::RNNDescriptors_t
typename Architecture_t::RNNDescriptors_t RNNDescriptors_t
Definition: RNNLayer.h:71
TMVA::DNN::RNN::TBasicRNNLayer::fBiases
Matrix_t & fBiases
Biases.
Definition: RNNLayer.h:85
TMVA::DNN::VGeneralLayer::Initialize
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:395
TMVA::DNN::RNN::TBasicRNNLayer::CellForward
void CellForward(const Matrix_t &input, Matrix_t &dF)
Forward for a single cell (time unit)
Definition: RNNLayer.h:403
TMVA::DNN::RNN::TBasicRNNLayer::fState
Matrix_t fState
Hidden State.
Definition: RNNLayer.h:82
y
Double_t y[n]
Definition: legend1.C:17
TMVA::DNN::RNN::TBasicRNNLayer::ReadWeightsFromXML
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Definition: RNNLayer.h:615
TMVA::DNN::RNN::TBasicRNNLayer::GetBiasesState
Matrix_t & GetBiasesState()
Definition: RNNLayer.h:188
TMVA::DNN::VGeneralLayer::Tensor_t
typename Architecture_t::Tensor_t Tensor_t
Definition: GeneralLayer.h:53
TMVA::DNN::RNN::TBasicRNNLayer::WeightsDescriptor_t
typename Architecture_t::FilterDescriptor_t WeightsDescriptor_t
Definition: RNNLayer.h:66
TMVA::DNN::RNN::TBasicRNNLayer::fBiasGradients
Matrix_t & fBiasGradients
Gradients w.r.t. the bias values.
Definition: RNNLayer.h:90
TMVA::DNN::RNN::TBasicRNNLayer::GetActivationFunction
DNN::EActivationFunction GetActivationFunction() const
Definition: RNNLayer.h:173
TMVA::DNN::TDescriptors
Definition: ContextHandles.h:29
TMVA::DNN::RNN::TBasicRNNLayer::fWeightsTensor
Tensor_t fWeightsTensor
Definition: RNNLayer.h:92
TMVA::DNN::EInitialization
EInitialization
Definition: Functions.h:72
TMVA::DNN::RNN::TBasicRNNLayer::GetWeightsTensor
const Tensor_t & GetWeightsTensor() const
Definition: RNNLayer.h:198
TMVA::DNN::RNN::TBasicRNNLayer::fDx
Tensor_t fDx
cached gradient on the input (output of backward) as T x B x I
Definition: RNNLayer.h:105
Functions.h
TMVA::DNN::RNN::TBasicRNNLayer::fWeightGradientsTensor
Tensor_t fWeightGradientsTensor
Definition: RNNLayer.h:93
TMVA::DNN::RNN::TBasicRNNLayer::fWorkspace
TWorkspace * fWorkspace
Definition: RNNLayer.h:98
TMVA::DNN::RNN::TBasicRNNLayer::GetDY
Tensor_t & GetDY()
Definition: RNNLayer.h:205
TMVA::DNN::RNN::TBasicRNNLayer::DoesRememberState
bool DoesRememberState() const
Definition: RNNLayer.h:171
TMVA::DNN::RNN::TBasicRNNLayer::fWeightStateGradients
Matrix_t & fWeightStateGradients
Gradients w.r.t. the recurring weights.
Definition: RNNLayer.h:89
TMVA::DNN::RNN::TBasicRNNLayer::fDy
Tensor_t fDy
cached activation gradient (input of backward) as T x B x S
Definition: RNNLayer.h:106
TMVA::DNN::RNN::TBasicRNNLayer::GetWeightsTensor
Tensor_t & GetWeightsTensor()
Definition: RNNLayer.h:197
TMVA::DNN::RNN::TBasicRNNLayer::HelperDescriptor_t
typename Architecture_t::DropoutDescriptor_t HelperDescriptor_t
Definition: RNNLayer.h:68
TMVA::DNN::RNN::TBasicRNNLayer::fCell
Matrix_t fCell
Empty matrix for RNN.
Definition: RNNLayer.h:100
name
char name[80]
Definition: TGX11.cxx:110
TMVA::DNN::RNN::TBasicRNNLayer::fActivationDesc
Architecture_t::ActivationDescriptor_t fActivationDesc
Definition: RNNLayer.h:95
TMVA::DNN::RNN::TBasicRNNLayer::fF
DNN::EActivationFunction fF
Activation function of the hidden state.
Definition: RNNLayer.h:80
TMVA::DNN::RNN::TBasicRNNLayer::fTimeSteps
size_t fTimeSteps
Timesteps for RNN.
Definition: RNNLayer.h:75
TMVA::DNN::RNN::TBasicRNNLayer::DoesReturnSequence
bool DoesReturnSequence() const
Definition: RNNLayer.h:172
TMVA::DNN::RNN::debugMatrix
auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name="matrix") -> void
Definition: RNNLayer.h:306
TMVA::DNN::RNN::TBasicRNNLayer::Initialize
virtual void Initialize()
Initialize the weights according to the given initialization method.
Definition: RNNLayer.h:264
TMVA::DNN::RNN::TBasicRNNLayer::GetWeightsState
const Matrix_t & GetWeightsState() const
Definition: RNNLayer.h:182
TMVA::DNN::RNN::TBasicRNNLayer::GetBiasStateGradients
const Matrix_t & GetBiasStateGradients() const
Definition: RNNLayer.h:191
TMVA::DNN::RNN::TBasicRNNLayer::GetWeightStateGradients
Matrix_t & GetWeightStateGradients()
Definition: RNNLayer.h:194
TMVA::gTools
Tools & gTools()
TMVA::DNN::RNN::TBasicRNNLayer::Print
void Print() const
Prints the info about the layer.
Definition: RNNLayer.h:295
TMVA::DNN::RNN::TBasicRNNLayer::GetTimeSteps
size_t GetTimeSteps() const
Getters.
Definition: RNNLayer.h:168
TMVA::DNN::RNN::TBasicRNNLayer::LayerDescriptor_t
typename Architecture_t::RecurrentDescriptor_t LayerDescriptor_t
Definition: RNNLayer.h:65
TMVA::DNN::RNN::TBasicRNNLayer::Backward
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward)
Backpropagates the error.
Definition: RNNLayer.h:426
TMVA::DNN::VGeneralLayer::GetInputWidth
size_t GetInputWidth() const
Definition: GeneralLayer.h:166
TMVA::DNN::RNN::TBasicRNNLayer::GetWeightGradientsTensor
Tensor_t & GetWeightGradientsTensor()
Definition: RNNLayer.h:199
TMVA::DNN::EActivationFunction
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
TMVA::DNN::RNN::TBasicRNNLayer::GetWeightInputGradients
Matrix_t & GetWeightInputGradients()
Definition: RNNLayer.h:192
TMVA::DNN::RNN::TBasicRNNLayer::GetInputSize
size_t GetInputSize() const
Definition: RNNLayer.h:170
TMVA::DNN::RNN::TBasicRNNLayer::GetY
Tensor_t & GetY()
Definition: RNNLayer.h:203
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
TMVA::DNN::RNN::TBasicRNNLayer::fY
Tensor_t fY
cached output tensor as T x B x S
Definition: RNNLayer.h:104
TMVA::DNN::RNN::TBasicRNNLayer::GetBiasStateGradients
Matrix_t & GetBiasStateGradients()
Definition: RNNLayer.h:190