Logo ROOT  
Reference Guide
RNNLayer.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn/rnn:$Id$
2// Author: Saurav Shekhar 19/07/17
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : BasicRNNLayer *
8 * *
9 * Description: *
10 * NeuralNetwork *
11 * *
12 * Authors (alphabetical): *
13 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
14 * *
15 * Copyright (c) 2005-2015: *
16 * All rights reserved. *
17 * CERN, Switzerland *
18 * *
19 * For the licensing terms see $ROOTSYS/LICENSE. *
20 * For the list of contributors see $ROOTSYS/README/CREDITS. *
21 **********************************************************************************/
22
23//#pragma once
24
25//////////////////////////////////////////////////////////////////////
26// <Description> //
27//////////////////////////////////////////////////////////////////////
28
29#ifndef TMVA_DNN_RNN_LAYER
30#define TMVA_DNN_RNN_LAYER
31
32#include <cmath>
33#include <iostream>
34
35
36#include "TMatrix.h"
37#include "TMVA/DNN/Functions.h"
38
39namespace TMVA
40{
41namespace DNN
42{
43
44namespace RNN {
45
46//______________________________________________________________________________
47//
48// Basic RNN Layer
49//______________________________________________________________________________
50
51/** \class BasicRNNLayer
52 Generic implementation
53*/
54template<typename Architecture_t>
55 class TBasicRNNLayer : public VGeneralLayer<Architecture_t>
56{
57
58public:
59
60 using Tensor_t = typename Architecture_t::Tensor_t;
61 using Matrix_t = typename Architecture_t::Matrix_t;
62 using Scalar_t = typename Architecture_t::Scalar_t;
63
64 using LayerDescriptor_t = typename Architecture_t::RecurrentDescriptor_t;
65 using WeightsDescriptor_t = typename Architecture_t::FilterDescriptor_t;
66 using TensorDescriptor_t = typename Architecture_t::TensorDescriptor_t;
67 using HelperDescriptor_t = typename Architecture_t::DropoutDescriptor_t;
68
69 using RNNWorkspace_t = typename Architecture_t::RNNWorkspace_t;
70 using RNNDescriptors_t = typename Architecture_t::RNNDescriptors_t;
71
72private:
73
74 size_t fTimeSteps; ///< Timesteps for RNN
75 size_t fStateSize; ///< Hidden state size of RNN
76 bool fRememberState; ///< Remember state in next pass
77 bool fReturnSequence = false; ///< Return in output full sequence or just last element in time
78
79 DNN::EActivationFunction fF; ///< Activation function of the hidden state
80
81 Matrix_t fState; ///< Hidden State
82 Matrix_t &fWeightsInput; ///< Input weights, fWeights[0]
83 Matrix_t &fWeightsState; ///< Prev state weights, fWeights[1]
84 Matrix_t &fBiases; ///< Biases
85
86 Tensor_t fDerivatives; ///< First fDerivatives of the activations
87 Matrix_t &fWeightInputGradients; ///< Gradients w.r.t. the input weights
88 Matrix_t &fWeightStateGradients; ///< Gradients w.r.t. the recurring weights
89 Matrix_t &fBiasGradients; ///< Gradients w.r.t. the bias values
90
93
94 typename Architecture_t::ActivationDescriptor_t fActivationDesc;
95
96 TDescriptors *fDescriptors = nullptr; ///< Keeps all the RNN descriptors
97 TWorkspace *fWorkspace = nullptr; // workspace needed for GPU computation (CudNN)
98
99 Matrix_t fCell; ///< Empty matrix for RNN
100
101 // tensors used internally for the forward and backward pass
102 Tensor_t fX; ///< cached input tensor as T x B x I
103 Tensor_t fY; ///< cached output tensor as T x B x S
104 Tensor_t fDx; ///< cached gradient on the input (output of backward) as T x B x I
105 Tensor_t fDy; ///< cached activation gradient (input of backward) as T x B x S
106
107
108public:
109
110 /** Constructor */
111 TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize,
112 size_t timeSteps, bool rememberState = false, bool returnSequence = false,
114 bool training = true, DNN::EInitialization fA = DNN::EInitialization::kZero);
115
116 /** Copy Constructor */
118
119 /*! Destructor. */
120 virtual ~TBasicRNNLayer();
121
122 /*! Initialize the weights according to the given initialization
123 ** method. */
124 virtual void Initialize();
125
126 /*! Initialize the state
127 ** method. */
129
130 /*! Compute and return the next state with given input
131 * matrix */
132 void Forward(Tensor_t &input, bool isTraining = true);
133
134 /*! Forward for a single cell (time unit) */
135 void CellForward(const Matrix_t &input, Matrix_t & dF);
136
137 /*! Backpropagates the error. Must only be called directly at the corresponding
138 * call to Forward(...). */
139 void Backward(Tensor_t &gradients_backward,
140 const Tensor_t &activations_backward);
141
142 /* Updates weights and biases, given the learning rate */
143 void Update(const Scalar_t learningRate);
144
145 /*! Backward for a single time unit
146 * a the corresponding call to Forward(...). */
147 inline Matrix_t & CellBackward(Matrix_t & state_gradients_backward,
148 const Matrix_t & precStateActivations,
149 const Matrix_t & input, Matrix_t & input_gradient, Matrix_t &dF);
150
151 /** Prints the info about the layer */
152 void Print() const;
153
154 /*! Writes the information and the weights about the layer in an XML node. */
155 virtual void AddWeightsXMLTo(void *parent);
156
157 /*! Read the information and the weights about the layer from XML node. */
158 virtual void ReadWeightsFromXML(void *parent);
159
160 void InitTensors();
161 // void InitializeDescriptors();
162 // void ReleaseDescriptors();
163 // void InitializeWorkspace();
164 // void FreeWorkspace();
165
166 /** Getters */
167 size_t GetTimeSteps() const { return fTimeSteps; }
168 size_t GetStateSize() const { return fStateSize; }
169 size_t GetInputSize() const { return this->GetInputWidth(); }
170 inline bool DoesRememberState() const {return fRememberState;}
171 inline bool DoesReturnSequence() const { return fReturnSequence; }
173 Matrix_t & GetState() {return fState;} // RNN Hidden state
174 const Matrix_t & GetState() const {return fState;}
175 Matrix_t &GetCell() { return fCell; } // this returns an empty matrixfor RNN
176 const Matrix_t &GetCell() const { return fCell; }
177
179 const Matrix_t & GetWeightsInput() const {return fWeightsInput;}
181 const Matrix_t & GetWeightsState() const {return fWeightsState;}
183 const Tensor_t & GetDerivatives() const {return fDerivatives;}
184 // Matrix_t &GetDerivativesAt(size_t i) { return fDerivatives[i]; }
185 // const Matrix_t &GetDerivativesAt(size_t i) const { return fDerivatives[i]; }
186
188 const Matrix_t & GetBiasesState() const {return fBiases;}
195
197 const Tensor_t &GetWeightsTensor() const { return fWeightsTensor; }
200
201 Tensor_t &GetX() { return fX; }
202 Tensor_t &GetY() { return fY; }
203 Tensor_t &GetDX() { return fDx; }
204 Tensor_t &GetDY() { return fDy; }
205};
206
207//______________________________________________________________________________
208//
209// BasicRNNLayer Implementation
210//______________________________________________________________________________
211template <typename Architecture_t>
212TBasicRNNLayer<Architecture_t>::TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps,
213 bool rememberState, bool returnSequence, DNN::EActivationFunction f, bool /*training*/,
215 // TODO inputDepth and outputDepth changed to batchSize??
216 : VGeneralLayer<Architecture_t>(batchSize, 1, timeSteps, inputSize, 1, (returnSequence) ? timeSteps : 1 ,
217 stateSize, 2, {stateSize, stateSize}, {inputSize, stateSize}, 1, {stateSize}, {1},
218 batchSize, (returnSequence) ? timeSteps : 1, stateSize, fA),
219 fTimeSteps(timeSteps), fStateSize(stateSize), fRememberState(rememberState), fReturnSequence(returnSequence), fF(f), fState(batchSize, stateSize),
220 fWeightsInput(this->GetWeightsAt(0)), fWeightsState(this->GetWeightsAt(1)),
221 fBiases(this->GetBiasesAt(0)), fDerivatives(timeSteps, batchSize, stateSize), // create tensor time x bs x S
222 fWeightInputGradients(this->GetWeightGradientsAt(0)), fWeightStateGradients(this->GetWeightGradientsAt(1)),
223 fBiasGradients(this->GetBiasGradientsAt(0)), fWeightsTensor({0}), fWeightGradientsTensor({0})
224{
225 InitTensors();
226}
227
228//______________________________________________________________________________
229template <typename Architecture_t>
231 : VGeneralLayer<Architecture_t>(layer), fTimeSteps(layer.fTimeSteps), fStateSize(layer.fStateSize),
232 fRememberState(layer.fRememberState), fReturnSequence(layer.fReturnSequence), fF(layer.GetActivationFunction()),
233 fState(layer.GetBatchSize(), layer.GetStateSize()),
234 fWeightsInput(this->GetWeightsAt(0)), fWeightsState(this->GetWeightsAt(1)), fBiases(this->GetBiasesAt(0)),
235 fDerivatives(layer.GetDerivatives().GetShape()), fWeightInputGradients(this->GetWeightGradientsAt(0)),
236 fWeightStateGradients(this->GetWeightGradientsAt(1)), fBiasGradients(this->GetBiasGradientsAt(0)),
237 fWeightsTensor({0}), fWeightGradientsTensor({0})
238{
239
240 Architecture_t::Copy(fDerivatives, layer.GetDerivatives() );
241
242 // Gradient matrices not copied
243 Architecture_t::Copy(fState, layer.GetState());
244 InitTensors();
245}
246
247template <typename Architecture_t>
249{
250 if (fDescriptors) {
251 Architecture_t::ReleaseRNNDescriptors(fDescriptors);
252 delete fDescriptors;
253 }
254
255 if (fWorkspace) {
256 Architecture_t::FreeRNNWorkspace(fWorkspace);
257 delete fWorkspace;
258 }
259}
260
261//______________________________________________________________________________
262template<typename Architecture_t>
264{
265 // auto m = this->GetInitialization();
266 // DNN::initialize<Architecture_t>(fWeightsInput, m);
267 // DNN::initialize<Architecture_t>(fWeightsState, m);
268 // DNN::initialize<Architecture_t>(fBiases, DNN::EInitialization::kZero);
269
271
272 Architecture_t::InitializeRNNDescriptors(fDescriptors, this);
273 Architecture_t::InitializeRNNWorkspace(fWorkspace, fDescriptors, this);
274}
275
276//______________________________________________________________________________
277template <typename Architecture_t>
279{
280 // fix output tensor for Cudnn must be a tensor of B x T x S of right layout
281 Architecture_t::InitializeRNNTensors(this);
282}
283//______________________________________________________________________________
284template <typename Architecture_t>
286{
287 DNN::initialize<Architecture_t>(this->GetState(), DNN::EInitialization::kZero);
288
289 Architecture_t::InitializeActivationDescriptor(fActivationDesc,this->GetActivationFunction());
290}
291
292//______________________________________________________________________________
293template<typename Architecture_t>
295-> void
296{
297 std::cout << " RECURRENT Layer: \t ";
298 std::cout << " (NInput = " << this->GetInputSize(); // input size
299 std::cout << ", NState = " << this->GetStateSize(); // hidden state size
300 std::cout << ", NTime = " << this->GetTimeSteps() << " )"; // time size
301 std::cout << "\tOutput = ( " << this->GetOutput().GetFirstSize() << " , " << this->GetOutput().GetHSize() << " , " << this->GetOutput().GetWSize() << " )\n";
302}
303
304template <typename Architecture_t>
305auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name = "matrix")
306-> void
307{
308 std::cout << name << "\n";
309 for (size_t i = 0; i < A.GetNrows(); ++i) {
310 for (size_t j = 0; j < A.GetNcols(); ++j) {
311 std::cout << A(i, j) << " ";
312 }
313 std::cout << "\n";
314 }
315 std::cout << "********\n";
316}
317
318
319//______________________________________________________________________________
320template <typename Architecture_t>
321void TBasicRNNLayer<Architecture_t>::Forward(Tensor_t &input, bool isTraining ) // B x T x D
322{
323
324
325 // for Cudnn
326 if (Architecture_t::IsCudnn()) {
327
328 Tensor_t &x = this->fX;
329 Tensor_t &y = this->fY;
330
331 Architecture_t::Rearrange(x, input);
332
333 const auto &weights = this->GetWeightsAt(0);
334 // Tensor_t cx({1}); // not used for normal RNN
335 // Tensor_t cy({1}); // not used for normal RNN
336
337 // hx is fState - tensor are of right shape
338 auto &hx = this->GetState();
339 auto &cx = this->GetCell();
340 // use same for hy and cy
341 auto &hy = this->GetState();
342 auto &cy = this->GetCell();
343
344 auto rnnDesc = static_cast<RNNDescriptors_t &>(*fDescriptors);
345 auto rnnWork = static_cast<RNNWorkspace_t &>(*fWorkspace);
346
347 Architecture_t::RNNForward(x, hx, cx, weights, y, hy, cy, rnnDesc, rnnWork, isTraining);
348
349 if (fReturnSequence) {
350 Architecture_t::Rearrange(this->GetOutput(), y); // swap B and T from y to Output
351 }
352 else {
353 // tmp is a reference to y (full cudnn output)
354 Tensor_t tmp = (y.At(y.GetShape()[0] - 1)).Reshape({y.GetShape()[1], 1, y.GetShape()[2]});
355 Architecture_t::Copy(this->GetOutput(), tmp);
356 }
357
358 return;
359 }
360
361 // FORWARD for CPU architecture
362 // D : input size
363 // H : state size
364 // T : time size
365 // B : batch size
366
367 Tensor_t arrInput (fTimeSteps, this->GetBatchSize(), this->GetInputWidth() );
368 //for (size_t t = 0; t < fTimeSteps; ++t) arrInput.emplace_back(this->GetBatchSize(), this->GetInputWidth()); // T x B x D
369 Architecture_t::Rearrange(arrInput, input);
370 Tensor_t arrOutput ( fTimeSteps, this->GetBatchSize(), fStateSize);
371 //for (size_t t = 0; t < fTimeSteps;++t) arrOutput.emplace_back(this->GetBatchSize(), fStateSize); // T x B x H
372
373 if (!this->fRememberState) InitState(DNN::EInitialization::kZero);
374
375 for (size_t t = 0; t < fTimeSteps; ++t) {
376 Matrix_t arrInput_m = arrInput.At(t).GetMatrix();
377 Matrix_t df_m = fDerivatives.At(t).GetMatrix();
378 CellForward(arrInput_m, df_m );
379 Matrix_t arrOutput_m = arrOutput.At(t).GetMatrix();
380 Architecture_t::Copy(arrOutput_m, fState);
381 }
382
383 if (fReturnSequence)
384 Architecture_t::Rearrange(this->GetOutput(), arrOutput); // B x T x D
385 else {
386 // get T[end[]]
387
388 Tensor_t tmp = arrOutput.At(fTimeSteps - 1); // take last time step
389 // shape of tmp is for CPU (columnwise) B x D , need to reshape to make a B x D x 1
390 // and transpose it to 1 x D x B (this is how output is expected in columnmajor format)
391 tmp = tmp.Reshape({tmp.GetShape()[0], tmp.GetShape()[1], 1});
392 assert(tmp.GetSize() == this->GetOutput().GetSize());
393 assert(tmp.GetShape()[0] == this->GetOutput().GetShape()[2]); // B is last dim in output and first in tmp
394 Architecture_t::Rearrange(this->GetOutput(), tmp);
395 // keep array output
396 fY = arrOutput;
397 }
398}
399
400//______________________________________________________________________________
401template <typename Architecture_t>
403-> void
404{
405 // State = act(W_input . input + W_state . state + bias)
406 const DNN::EActivationFunction fAF = this->GetActivationFunction();
407 Matrix_t tmpState(fState.GetNrows(), fState.GetNcols());
408 Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsState);
409 Architecture_t::MultiplyTranspose(fState, input, fWeightsInput);
410 Architecture_t::ScaleAdd(fState, tmpState);
411 Architecture_t::AddRowWise(fState, fBiases);
412 Tensor_t inputActivFunc(dF);
413 Tensor_t tState(fState);
414
415 // DNN::evaluateDerivative<Architecture_t>(dFt, fAF, fState);
416 // DNN::evaluate<Architecture_t>(tState, fAF);
417
418 Architecture_t::Copy(inputActivFunc, tState);
419 Architecture_t::ActivationFunctionForward(tState, fAF, fActivationDesc);
420
421}
422
423//____________________________________________________________________________
424template <typename Architecture_t>
425auto inline TBasicRNNLayer<Architecture_t>::Backward(Tensor_t &gradients_backward, // B x T x D
426 const Tensor_t &activations_backward) -> void // B x T x D
427 // std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
428 // /*inp2*/) -> void
429{
430 //BACKWARD for CUDNN
431 if (Architecture_t::IsCudnn() ) {
432
433 Tensor_t &x = this->fX;
434 Tensor_t &y = this->fY;
435 Tensor_t &dx = this->fDy;
436 Tensor_t &dy = this->fDy;
437
438 // input size is stride[1] of input tensor that is B x T x inputSize
439 assert(activations_backward.GetStrides()[1] == this->GetInputSize() );
440
441 Architecture_t::Rearrange(x, activations_backward);
442
443 if (!fReturnSequence) {
444
445 //Architecture_t::InitializeZero(dy);
446 Architecture_t::InitializeZero(dy);
447
448 //Tensor_t tmp1 = y.At(y.GetShape()[0] - 1).Reshape({y.GetShape()[1], 1, y.GetShape()[2]});
449 Tensor_t tmp2 = dy.At(dy.GetShape()[0] - 1).Reshape({dy.GetShape()[1], 1, dy.GetShape()[2]});
450
451 //Architecture_t::Copy(tmp1, this->GetOutput());
452 Architecture_t::Copy(tmp2, this->GetActivationGradients());
453 }
454 else {
455 Architecture_t::Rearrange(y, this->GetOutput());
456 Architecture_t::Rearrange(dy, this->GetActivationGradients());
457 }
458
459
460
461 // for cudnn Matrix_t and Tensor_t are same type
462 const auto &weights = this->GetWeightsTensor();
463 auto &weightGradients = this->GetWeightGradientsTensor();
464 // note that cudnnRNNBackwardWeights accumulate the weight gradients.
465 // We need then to initialize the tensor to zero every time
466 Architecture_t::InitializeZero(weightGradients);
467
468 // hx is fState
469 auto &hx = this->GetState();
470 auto cx = this->GetCell();
471 // use same for hy and cy
472 auto &dhy = hx;
473 auto &dcy = cx;
474 auto &dhx = hx;
475 auto &dcx = cx;
476
477
478 auto rnnDesc = static_cast<RNNDescriptors_t &>(*fDescriptors);
479 auto rnnWork = static_cast<RNNWorkspace_t &>(*fWorkspace);
480
481 Architecture_t::RNNBackward(x, hx, cx, y, dy, dhy, dcy, weights, dx, dhx, dcx, weightGradients, rnnDesc, rnnWork);
482
483 //Architecture_t::PrintTensor(this->GetOutput(), "output after bwd");
484
485 if (gradients_backward.GetSize() != 0)
486 Architecture_t::Rearrange(gradients_backward, dx);
487
488 return;
489 }
490
491 // BACKWARD FOR CPU
492 // activations backward is input
493 // gradients_backward is activationGradients of layer before it, which is input layer
494 // currently gradient_backward is for input(x) and not for state
495 // TODO use this to change initial state??
496
497
498 bool dummy = false;
499 if (gradients_backward.GetSize() == 0) {
500 dummy = true;
501 }
502 Tensor_t arr_gradients_backward ( fTimeSteps, this->GetBatchSize(), this->GetInputSize());
503 //for (size_t t = 0; t < fTimeSteps; ++t) arr_gradients_backward.emplace_back(this->GetBatchSize(), this->GetInputSize()); // T x B x D
504
505 if (!dummy) {
506 // TODO gradients_backward will be written back on the matrix
507 //Architecture_t::Rearrange(arr_gradients_backward, gradients_backward);
508 }
509 Tensor_t arr_activations_backward ( fTimeSteps, this->GetBatchSize(), this->GetInputSize());
510 //for (size_t t = 0; t < fTimeSteps; ++t) arr_activations_backward.emplace_back(this->GetBatchSize(), this->GetInputSize()); // T x B x D
511 Architecture_t::Rearrange(arr_activations_backward, activations_backward);
512
513 Matrix_t state_gradients_backward(this->GetBatchSize(), fStateSize); // B x H
514 DNN::initialize<Architecture_t>(state_gradients_backward, DNN::EInitialization::kZero);
515
516 Matrix_t initState(this->GetBatchSize(), fStateSize); // B x H
517 DNN::initialize<Architecture_t>(initState, DNN::EInitialization::kZero);
518
519 Tensor_t arr_output ( fTimeSteps, this->GetBatchSize(), fStateSize);
520 Tensor_t arr_actgradients(fTimeSteps, this->GetBatchSize(), fStateSize);
521
522 if (fReturnSequence) {
523 Architecture_t::Rearrange(arr_output, this->GetOutput());
524 Architecture_t::Rearrange(arr_actgradients, this->GetActivationGradients());
525 } else {
526 //
527 arr_output = fY;
528
529 Architecture_t::InitializeZero(arr_actgradients);
530 // need to reshape to pad a time dimension = 1 (note here is columnmajor tensors)
531 Tensor_t tmp_grad = arr_actgradients.At(fTimeSteps - 1).Reshape({this->GetBatchSize(), fStateSize, 1});
532 assert(tmp_grad.GetSize() == this->GetActivationGradients().GetSize());
533 assert(tmp_grad.GetShape()[0] ==
534 this->GetActivationGradients().GetShape()[2]); // B in tmp is [0] and [2] in input act. gradients
535
536 Architecture_t::Rearrange(tmp_grad, this->GetActivationGradients());
537 }
538
539 // reinitialize weights and biases gradients to 0
540 fWeightInputGradients.Zero();
541 fWeightStateGradients.Zero();
542 fBiasGradients.Zero();
543
544 for (size_t t = fTimeSteps; t > 0; t--) {
545 //const Matrix_t & currStateActivations = arr_output[t - 1];
546 Matrix_t actgrad_m = arr_actgradients.At(t - 1).GetMatrix();
547 Architecture_t::ScaleAdd(state_gradients_backward, actgrad_m);
548
549 Matrix_t actbw_m = arr_activations_backward.At(t - 1).GetMatrix();
550 Matrix_t gradbw_m = arr_gradients_backward.At(t - 1).GetMatrix();
551
552 // compute derivatives of activations
553 Tensor_t df = fDerivatives.At(t-1);
554 Tensor_t dy = Tensor_t(state_gradients_backward);
555 //Tensor_t dy = arr_actgradients.At(t - 1);
556 Tensor_t y = arr_output.At(t-1);
557 Architecture_t::ActivationFunctionBackward(df, y,
558 dy, df, //do in place (should work)
559 this->GetActivationFunction(), fActivationDesc);
560
561 Matrix_t df_m = df.GetMatrix();
562
563 // Architecture_t::PrintTensor(df, "dy before");
564 if (t > 1) {
565 Matrix_t precStateActivations = arr_output.At(t - 2).GetMatrix();
566 CellBackward(state_gradients_backward, precStateActivations, actbw_m, gradbw_m, df_m);
567
568 } else {
569 const Matrix_t & precStateActivations = initState;
570 CellBackward(state_gradients_backward, precStateActivations, actbw_m, gradbw_m, df_m);
571
572 }
573 }
574 if (!dummy) {
575 Architecture_t::Rearrange(gradients_backward, arr_gradients_backward );
576 }
577}
578
579//______________________________________________________________________________
580template <typename Architecture_t>
581auto inline TBasicRNNLayer<Architecture_t>::CellBackward(Matrix_t & state_gradients_backward,
582 const Matrix_t & precStateActivations,
583 const Matrix_t & input, Matrix_t & input_gradient, Matrix_t &dF)
584-> Matrix_t &
585{
586 return Architecture_t::RecurrentLayerBackward(state_gradients_backward, fWeightInputGradients, fWeightStateGradients,
587 fBiasGradients, dF, precStateActivations, fWeightsInput,
588 fWeightsState, input, input_gradient);
589}
590
591//______________________________________________________________________________
592template <typename Architecture_t>
594{
595 auto layerxml = gTools().xmlengine().NewChild(parent, 0, "RNNLayer");
596
597 // write All other info like stateSize, inputSize, timeSteps,rememberState
598 gTools().xmlengine().NewAttr(layerxml, 0, "StateSize", gTools().StringFromInt(this->GetStateSize()));
599 gTools().xmlengine().NewAttr(layerxml, 0, "InputSize", gTools().StringFromInt(this->GetInputSize()));
600 gTools().xmlengine().NewAttr(layerxml, 0, "TimeSteps", gTools().StringFromInt(this->GetTimeSteps()));
601 gTools().xmlengine().NewAttr(layerxml, 0, "RememberState", gTools().StringFromInt(this->DoesRememberState()));
602 gTools().xmlengine().NewAttr(layerxml, 0, "ReturnSequence", gTools().StringFromInt(this->DoesReturnSequence()));
603
604 // write weights and bias matrices
605 this->WriteMatrixToXML(layerxml, "InputWeights", this -> GetWeightsAt(0));
606 this->WriteMatrixToXML(layerxml, "StateWeights", this -> GetWeightsAt(1));
607 this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0));
608
609
610}
611
612//______________________________________________________________________________
613template <typename Architecture_t>
615{
616 // Read weights and biases
617 this->ReadMatrixXML(parent,"InputWeights", this -> GetWeightsAt(0));
618 this->ReadMatrixXML(parent,"StateWeights", this -> GetWeightsAt(1));
619 this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
620
621}
622
623} // namespace RNN
624} // namespace DNN
625} // namespace TMVA
626
627#endif
#define f(i)
Definition: RSha256.hxx:104
static RooMathCoreReg dummy
char name[80]
Definition: TGX11.cxx:109
Tensor_t fDy
cached activation gradient (input of backward) as T x B x S
Definition: RNNLayer.h:105
size_t GetStateSize() const
Definition: RNNLayer.h:168
typename Architecture_t::RNNDescriptors_t RNNDescriptors_t
Definition: RNNLayer.h:70
DNN::EActivationFunction GetActivationFunction() const
Definition: RNNLayer.h:172
void InitState(DNN::EInitialization m=DNN::EInitialization::kZero)
Initialize the state method.
Definition: RNNLayer.h:285
const Matrix_t & GetWeightInputGradients() const
Definition: RNNLayer.h:192
const Tensor_t & GetWeightGradientsTensor() const
Definition: RNNLayer.h:199
void Print() const
Prints the info about the layer.
Definition: RNNLayer.h:294
typename Architecture_t::RecurrentDescriptor_t LayerDescriptor_t
Definition: RNNLayer.h:64
Tensor_t fY
cached output tensor as T x B x S
Definition: RNNLayer.h:103
Tensor_t fDerivatives
First fDerivatives of the activations.
Definition: RNNLayer.h:86
const Matrix_t & GetWeightStateGradients() const
Definition: RNNLayer.h:194
Matrix_t & fWeightsInput
Input weights, fWeights[0].
Definition: RNNLayer.h:82
Matrix_t & fWeightsState
Prev state weights, fWeights[1].
Definition: RNNLayer.h:83
virtual ~TBasicRNNLayer()
Destructor.
Definition: RNNLayer.h:248
TDescriptors * fDescriptors
Keeps all the RNN descriptors.
Definition: RNNLayer.h:96
Tensor_t fX
cached input tensor as T x B x I
Definition: RNNLayer.h:102
void Forward(Tensor_t &input, bool isTraining=true)
Compute and return the next state with given input matrix.
Definition: RNNLayer.h:321
Matrix_t & fBiases
Biases.
Definition: RNNLayer.h:84
Architecture_t::ActivationDescriptor_t fActivationDesc
Definition: RNNLayer.h:94
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Definition: RNNLayer.h:614
typename Architecture_t::TensorDescriptor_t TensorDescriptor_t
Definition: RNNLayer.h:66
bool fReturnSequence
Return in output full sequence or just last element in time.
Definition: RNNLayer.h:77
const Tensor_t & GetWeightsTensor() const
Definition: RNNLayer.h:197
Matrix_t & GetBiasStateGradients()
Definition: RNNLayer.h:189
size_t fStateSize
Hidden state size of RNN.
Definition: RNNLayer.h:75
const Matrix_t & GetState() const
Definition: RNNLayer.h:174
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward)
Backpropagates the error.
Definition: RNNLayer.h:425
const Matrix_t & GetCell() const
Definition: RNNLayer.h:176
Matrix_t & CellBackward(Matrix_t &state_gradients_backward, const Matrix_t &precStateActivations, const Matrix_t &input, Matrix_t &input_gradient, Matrix_t &dF)
Backward for a single time unit a the corresponding call to Forward(...).
Definition: RNNLayer.h:581
typename Architecture_t::Matrix_t Matrix_t
Definition: RNNLayer.h:61
typename Architecture_t::DropoutDescriptor_t HelperDescriptor_t
Definition: RNNLayer.h:67
typename Architecture_t::RNNWorkspace_t RNNWorkspace_t
Definition: RNNLayer.h:69
Matrix_t fState
Hidden State.
Definition: RNNLayer.h:81
Matrix_t & fWeightInputGradients
Gradients w.r.t. the input weights.
Definition: RNNLayer.h:87
DNN::EActivationFunction fF
Activation function of the hidden state.
Definition: RNNLayer.h:79
TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, DNN::EActivationFunction f=DNN::EActivationFunction::kTanh, bool training=true, DNN::EInitialization fA=DNN::EInitialization::kZero)
Constructor.
Definition: RNNLayer.h:212
Tensor_t & GetWeightGradientsTensor()
Definition: RNNLayer.h:198
size_t GetTimeSteps() const
Getters.
Definition: RNNLayer.h:167
bool fRememberState
Remember state in next pass.
Definition: RNNLayer.h:76
Matrix_t & fWeightStateGradients
Gradients w.r.t. the recurring weights.
Definition: RNNLayer.h:88
Matrix_t & GetWeightInputGradients()
Definition: RNNLayer.h:191
const Matrix_t & GetBiasesState() const
Definition: RNNLayer.h:188
void Update(const Scalar_t learningRate)
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Definition: RNNLayer.h:593
size_t fTimeSteps
Timesteps for RNN.
Definition: RNNLayer.h:74
bool DoesRememberState() const
Definition: RNNLayer.h:170
void CellForward(const Matrix_t &input, Matrix_t &dF)
Forward for a single cell (time unit)
Definition: RNNLayer.h:402
Tensor_t fDx
cached gradient on the input (output of backward) as T x B x I
Definition: RNNLayer.h:104
const Matrix_t & GetBiasStateGradients() const
Definition: RNNLayer.h:190
size_t GetInputSize() const
Definition: RNNLayer.h:169
Matrix_t & GetWeightStateGradients()
Definition: RNNLayer.h:193
bool DoesReturnSequence() const
Definition: RNNLayer.h:171
Matrix_t & fBiasGradients
Gradients w.r.t. the bias values.
Definition: RNNLayer.h:89
const Matrix_t & GetWeightsInput() const
Definition: RNNLayer.h:179
Matrix_t fCell
Empty matrix for RNN.
Definition: RNNLayer.h:99
const Tensor_t & GetDerivatives() const
Definition: RNNLayer.h:183
virtual void Initialize()
Initialize the weights according to the given initialization method.
Definition: RNNLayer.h:263
const Matrix_t & GetWeightsState() const
Definition: RNNLayer.h:181
typename Architecture_t::FilterDescriptor_t WeightsDescriptor_t
Definition: RNNLayer.h:65
Generic General Layer class.
Definition: GeneralLayer.h:49
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:393
typename Architecture_t::Scalar_t Scalar_t
Definition: GeneralLayer.h:53
typename Architecture_t::Tensor_t Tensor_t
Definition: GeneralLayer.h:51
size_t GetInputWidth() const
Definition: GeneralLayer.h:164
TXMLEngine & xmlengine()
Definition: Tools.h:268
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:709
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:580
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
static double A[]
void Copy(void *source, void *dest)
auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name="matrix") -> void
Definition: RNNLayer.h:305
EInitialization
Definition: Functions.h:72
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
create variable transformations
Tools & gTools()
auto * m
Definition: textangle.C:8