Logo ROOT  
Reference Guide
RNNLayer.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn/rnn:$Id$
2// Author: Saurav Shekhar 19/07/17
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : BasicRNNLayer *
8 * *
9 * Description: *
10 * NeuralNetwork *
11 * *
12 * Authors (alphabetical): *
13 * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
14 * *
15 * Copyright (c) 2005-2015: *
16 * All rights reserved. *
17 * CERN, Switzerland *
18 * *
19 * For the licensing terms see $ROOTSYS/LICENSE. *
20 * For the list of contributors see $ROOTSYS/README/CREDITS. *
21 **********************************************************************************/
22
23//#pragma once
24
25//////////////////////////////////////////////////////////////////////
26// <Description> //
27//////////////////////////////////////////////////////////////////////
28
29#ifndef TMVA_DNN_RNN_LAYER
30#define TMVA_DNN_RNN_LAYER
31
32#include <cmath>
33#include <iostream>
34#include <vector>
35#include <string>
36
37#include "TMatrix.h"
38#include "TMVA/DNN/Functions.h"
39
40namespace TMVA
41{
42namespace DNN
43{
44
45namespace RNN {
46
47//______________________________________________________________________________
48//
49// Basic RNN Layer
50//______________________________________________________________________________
51
52/** \class BasicRNNLayer
53 Generic implementation
54*/
55template<typename Architecture_t>
56 class TBasicRNNLayer : public VGeneralLayer<Architecture_t>
57{
58
59public:
60
61 using Tensor_t = typename Architecture_t::Tensor_t;
62 using Matrix_t = typename Architecture_t::Matrix_t;
63 using Scalar_t = typename Architecture_t::Scalar_t;
64
65 using LayerDescriptor_t = typename Architecture_t::RecurrentDescriptor_t;
66 using WeightsDescriptor_t = typename Architecture_t::FilterDescriptor_t;
67 using TensorDescriptor_t = typename Architecture_t::TensorDescriptor_t;
68 using HelperDescriptor_t = typename Architecture_t::DropoutDescriptor_t;
69
70 using RNNWorkspace_t = typename Architecture_t::RNNWorkspace_t;
71 using RNNDescriptors_t = typename Architecture_t::RNNDescriptors_t;
72
73private:
74
75 size_t fTimeSteps; ///< Timesteps for RNN
76 size_t fStateSize; ///< Hidden state size of RNN
77 bool fRememberState; ///< Remember state in next pass
78 bool fReturnSequence = false; ///< Return in output full sequence or just last element in time
79
80 DNN::EActivationFunction fF; ///< Activation function of the hidden state
81
82 Matrix_t fState; ///< Hidden State
83 Matrix_t &fWeightsInput; ///< Input weights, fWeights[0]
84 Matrix_t &fWeightsState; ///< Prev state weights, fWeights[1]
85 Matrix_t &fBiases; ///< Biases
86
87 Tensor_t fDerivatives; ///< First fDerivatives of the activations
88 Matrix_t &fWeightInputGradients; ///< Gradients w.r.t. the input weights
89 Matrix_t &fWeightStateGradients; ///< Gradients w.r.t. the recurring weights
90 Matrix_t &fBiasGradients; ///< Gradients w.r.t. the bias values
91
94
95 typename Architecture_t::ActivationDescriptor_t fActivationDesc;
96
97 TDescriptors *fDescriptors = nullptr; ///< Keeps all the RNN descriptors
98 TWorkspace *fWorkspace = nullptr; // workspace needed for GPU computation (CudNN)
99
100 Matrix_t fCell; ///< Empty matrix for RNN
101
102 // tensors used internally for the forward and backward pass
103 Tensor_t fX; ///< cached input tensor as T x B x I
104 Tensor_t fY; ///< cached output tensor as T x B x S
105 Tensor_t fDx; ///< cached gradient on the input (output of backward) as T x B x I
106 Tensor_t fDy; ///< cached activation gradient (input of backward) as T x B x S
107
108
109public:
110
111 /** Constructor */
112 TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize,
113 size_t timeSteps, bool rememberState = false, bool returnSequence = false,
115 bool training = true, DNN::EInitialization fA = DNN::EInitialization::kZero);
116
117 /** Copy Constructor */
119
120 /*! Destructor. */
121 virtual ~TBasicRNNLayer();
122
123 /*! Initialize the weights according to the given initialization
124 ** method. */
125 virtual void Initialize();
126
127 /*! Initialize the state
128 ** method. */
130
131 /*! Compute and return the next state with given input
132 * matrix */
133 void Forward(Tensor_t &input, bool isTraining = true);
134
135 /*! Forward for a single cell (time unit) */
136 void CellForward(const Matrix_t &input, Matrix_t & dF);
137
138 /*! Backpropagates the error. Must only be called directly at the corresponding
139 * call to Forward(...). */
140 void Backward(Tensor_t &gradients_backward,
141 const Tensor_t &activations_backward);
142
143 /* Updates weights and biases, given the learning rate */
144 void Update(const Scalar_t learningRate);
145
146 /*! Backward for a single time unit
147 * a the corresponding call to Forward(...). */
148 inline Matrix_t & CellBackward(Matrix_t & state_gradients_backward,
149 const Matrix_t & precStateActivations,
150 const Matrix_t & input, Matrix_t & input_gradient, Matrix_t &dF);
151
152 /** Prints the info about the layer */
153 void Print() const;
154
155 /*! Writes the information and the weights about the layer in an XML node. */
156 virtual void AddWeightsXMLTo(void *parent);
157
158 /*! Read the information and the weights about the layer from XML node. */
159 virtual void ReadWeightsFromXML(void *parent);
160
161 void InitTensors();
162 // void InitializeDescriptors();
163 // void ReleaseDescriptors();
164 // void InitializeWorkspace();
165 // void FreeWorkspace();
166
167 /** Getters */
168 size_t GetTimeSteps() const { return fTimeSteps; }
169 size_t GetStateSize() const { return fStateSize; }
170 size_t GetInputSize() const { return this->GetInputWidth(); }
171 inline bool DoesRememberState() const {return fRememberState;}
172 inline bool DoesReturnSequence() const { return fReturnSequence; }
174 Matrix_t & GetState() {return fState;} // RNN Hidden state
175 const Matrix_t & GetState() const {return fState;}
176 Matrix_t &GetCell() { return fCell; } // this returns an empty matrixfor RNN
177 const Matrix_t &GetCell() const { return fCell; }
178
180 const Matrix_t & GetWeightsInput() const {return fWeightsInput;}
182 const Matrix_t & GetWeightsState() const {return fWeightsState;}
184 const Tensor_t & GetDerivatives() const {return fDerivatives;}
185 // Matrix_t &GetDerivativesAt(size_t i) { return fDerivatives[i]; }
186 // const Matrix_t &GetDerivativesAt(size_t i) const { return fDerivatives[i]; }
187
189 const Matrix_t & GetBiasesState() const {return fBiases;}
196
198 const Tensor_t &GetWeightsTensor() const { return fWeightsTensor; }
201
202 Tensor_t &GetX() { return fX; }
203 Tensor_t &GetY() { return fY; }
204 Tensor_t &GetDX() { return fDx; }
205 Tensor_t &GetDY() { return fDy; }
206};
207
208//______________________________________________________________________________
209//
210// BasicRNNLayer Implementation
211//______________________________________________________________________________
212template <typename Architecture_t>
213TBasicRNNLayer<Architecture_t>::TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps,
214 bool rememberState, bool returnSequence, DNN::EActivationFunction f, bool /*training*/,
216 // TODO inputDepth and outputDepth changed to batchSize??
217 : VGeneralLayer<Architecture_t>(batchSize, 1, timeSteps, inputSize, 1, (returnSequence) ? timeSteps : 1 ,
218 stateSize, 2, {stateSize, stateSize}, {inputSize, stateSize}, 1, {stateSize}, {1},
219 batchSize, (returnSequence) ? timeSteps : 1, stateSize, fA),
220 fTimeSteps(timeSteps), fStateSize(stateSize), fRememberState(rememberState), fReturnSequence(returnSequence), fF(f), fState(batchSize, stateSize),
221 fWeightsInput(this->GetWeightsAt(0)), fWeightsState(this->GetWeightsAt(1)),
222 fBiases(this->GetBiasesAt(0)), fDerivatives(timeSteps, batchSize, stateSize), // create tensor time x bs x S
223 fWeightInputGradients(this->GetWeightGradientsAt(0)), fWeightStateGradients(this->GetWeightGradientsAt(1)),
224 fBiasGradients(this->GetBiasGradientsAt(0)), fWeightsTensor({0}), fWeightGradientsTensor({0})
225{
226 InitTensors();
227}
228
229//______________________________________________________________________________
230template <typename Architecture_t>
232 : VGeneralLayer<Architecture_t>(layer), fTimeSteps(layer.fTimeSteps), fStateSize(layer.fStateSize),
233 fRememberState(layer.fRememberState), fReturnSequence(layer.fReturnSequence), fF(layer.GetActivationFunction()),
234 fState(layer.GetBatchSize(), layer.GetStateSize()),
235 fWeightsInput(this->GetWeightsAt(0)), fWeightsState(this->GetWeightsAt(1)), fBiases(this->GetBiasesAt(0)),
236 fDerivatives(layer.GetDerivatives().GetShape()), fWeightInputGradients(this->GetWeightGradientsAt(0)),
237 fWeightStateGradients(this->GetWeightGradientsAt(1)), fBiasGradients(this->GetBiasGradientsAt(0)),
238 fWeightsTensor({0}), fWeightGradientsTensor({0})
239{
240
241 Architecture_t::Copy(fDerivatives, layer.GetDerivatives() );
242
243 // Gradient matrices not copied
244 Architecture_t::Copy(fState, layer.GetState());
245 InitTensors();
246}
247
248template <typename Architecture_t>
250{
251 if (fDescriptors) {
252 Architecture_t::ReleaseRNNDescriptors(fDescriptors);
253 delete fDescriptors;
254 }
255
256 if (fWorkspace) {
257 Architecture_t::FreeRNNWorkspace(fWorkspace);
258 delete fWorkspace;
259 }
260}
261
262//______________________________________________________________________________
263template<typename Architecture_t>
265{
266 // auto m = this->GetInitialization();
267 // DNN::initialize<Architecture_t>(fWeightsInput, m);
268 // DNN::initialize<Architecture_t>(fWeightsState, m);
269 // DNN::initialize<Architecture_t>(fBiases, DNN::EInitialization::kZero);
270
272
273 Architecture_t::InitializeRNNDescriptors(fDescriptors, this);
274 Architecture_t::InitializeRNNWorkspace(fWorkspace, fDescriptors, this);
275}
276
277//______________________________________________________________________________
278template <typename Architecture_t>
280{
281 // fix output tensor for Cudnn must be a tensor of B x T x S of right layout
282 Architecture_t::InitializeRNNTensors(this);
283}
284//______________________________________________________________________________
285template <typename Architecture_t>
287{
288 DNN::initialize<Architecture_t>(this->GetState(), DNN::EInitialization::kZero);
289
290 Architecture_t::InitializeActivationDescriptor(fActivationDesc,this->GetActivationFunction());
291}
292
293//______________________________________________________________________________
294template<typename Architecture_t>
296-> void
297{
298 std::cout << " RECURRENT Layer: \t ";
299 std::cout << " (NInput = " << this->GetInputSize(); // input size
300 std::cout << ", NState = " << this->GetStateSize(); // hidden state size
301 std::cout << ", NTime = " << this->GetTimeSteps() << " )"; // time size
302 std::cout << "\tOutput = ( " << this->GetOutput().GetFirstSize() << " , " << this->GetOutput().GetHSize() << " , " << this->GetOutput().GetWSize() << " )\n";
303}
304
305template <typename Architecture_t>
306auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name = "matrix")
307-> void
308{
309 std::cout << name << "\n";
310 for (size_t i = 0; i < A.GetNrows(); ++i) {
311 for (size_t j = 0; j < A.GetNcols(); ++j) {
312 std::cout << A(i, j) << " ";
313 }
314 std::cout << "\n";
315 }
316 std::cout << "********\n";
317}
318
319
320//______________________________________________________________________________
321template <typename Architecture_t>
322void TBasicRNNLayer<Architecture_t>::Forward(Tensor_t &input, bool isTraining ) // B x T x D
323{
324
325
326 // for Cudnn
327 if (Architecture_t::IsCudnn()) {
328
329 Tensor_t &x = this->fX;
330 Tensor_t &y = this->fY;
331
332 Architecture_t::Rearrange(x, input);
333
334 const auto &weights = this->GetWeightsAt(0);
335 // Tensor_t cx({1}); // not used for normal RNN
336 // Tensor_t cy({1}); // not used for normal RNN
337
338 // hx is fState - tensor are of right shape
339 auto &hx = this->GetState();
340 auto &cx = this->GetCell();
341 // use same for hy and cy
342 auto &hy = this->GetState();
343 auto &cy = this->GetCell();
344
345 auto rnnDesc = static_cast<RNNDescriptors_t &>(*fDescriptors);
346 auto rnnWork = static_cast<RNNWorkspace_t &>(*fWorkspace);
347
348 Architecture_t::RNNForward(x, hx, cx, weights, y, hy, cy, rnnDesc, rnnWork, isTraining);
349
350 if (fReturnSequence) {
351 Architecture_t::Rearrange(this->GetOutput(), y); // swap B and T from y to Output
352 }
353 else {
354 // tmp is a reference to y (full cudnn output)
355 Tensor_t tmp = (y.At(y.GetShape()[0] - 1)).Reshape({y.GetShape()[1], 1, y.GetShape()[2]});
356 Architecture_t::Copy(this->GetOutput(), tmp);
357 }
358
359 return;
360 }
361
362 // FORWARD for CPU architecture
363 // D : input size
364 // H : state size
365 // T : time size
366 // B : batch size
367
368 Tensor_t arrInput (fTimeSteps, this->GetBatchSize(), this->GetInputWidth() );
369 //for (size_t t = 0; t < fTimeSteps; ++t) arrInput.emplace_back(this->GetBatchSize(), this->GetInputWidth()); // T x B x D
370 Architecture_t::Rearrange(arrInput, input);
371 Tensor_t arrOutput ( fTimeSteps, this->GetBatchSize(), fStateSize);
372 //for (size_t t = 0; t < fTimeSteps;++t) arrOutput.emplace_back(this->GetBatchSize(), fStateSize); // T x B x H
373
374 if (!this->fRememberState) InitState(DNN::EInitialization::kZero);
375
376 for (size_t t = 0; t < fTimeSteps; ++t) {
377 Matrix_t arrInput_m = arrInput.At(t).GetMatrix();
378 Matrix_t df_m = fDerivatives.At(t).GetMatrix();
379 CellForward(arrInput_m, df_m );
380 Matrix_t arrOutput_m = arrOutput.At(t).GetMatrix();
381 Architecture_t::Copy(arrOutput_m, fState);
382 }
383
384 if (fReturnSequence)
385 Architecture_t::Rearrange(this->GetOutput(), arrOutput); // B x T x D
386 else {
387 // get T[end[]]
388
389 Tensor_t tmp = arrOutput.At(fTimeSteps - 1); // take last time step
390 // shape of tmp is for CPU (columnwise) B x D , need to reshape to make a B x D x 1
391 // and transpose it to 1 x D x B (this is how output is expected in columnmajor format)
392 tmp = tmp.Reshape({tmp.GetShape()[0], tmp.GetShape()[1], 1});
393 assert(tmp.GetSize() == this->GetOutput().GetSize());
394 assert(tmp.GetShape()[0] == this->GetOutput().GetShape()[2]); // B is last dim in output and first in tmp
395 Architecture_t::Rearrange(this->GetOutput(), tmp);
396 // keep array output
397 fY = arrOutput;
398 }
399}
400
401//______________________________________________________________________________
402template <typename Architecture_t>
404-> void
405{
406 // State = act(W_input . input + W_state . state + bias)
407 const DNN::EActivationFunction fAF = this->GetActivationFunction();
408 Matrix_t tmpState(fState.GetNrows(), fState.GetNcols());
409 Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsState);
410 Architecture_t::MultiplyTranspose(fState, input, fWeightsInput);
411 Architecture_t::ScaleAdd(fState, tmpState);
412 Architecture_t::AddRowWise(fState, fBiases);
413 Tensor_t inputActivFunc(dF);
414 Tensor_t tState(fState);
415
416 // DNN::evaluateDerivative<Architecture_t>(dFt, fAF, fState);
417 // DNN::evaluate<Architecture_t>(tState, fAF);
418
419 Architecture_t::Copy(inputActivFunc, tState);
420 Architecture_t::ActivationFunctionForward(tState, fAF, fActivationDesc);
421
422}
423
424//____________________________________________________________________________
425template <typename Architecture_t>
426auto inline TBasicRNNLayer<Architecture_t>::Backward(Tensor_t &gradients_backward, // B x T x D
427 const Tensor_t &activations_backward) -> void // B x T x D
428 // std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
429 // /*inp2*/) -> void
430{
431 //BACKWARD for CUDNN
432 if (Architecture_t::IsCudnn() ) {
433
434 Tensor_t &x = this->fX;
435 Tensor_t &y = this->fY;
436 Tensor_t &dx = this->fDy;
437 Tensor_t &dy = this->fDy;
438
439 // input size is stride[1] of input tensor that is B x T x inputSize
440 assert(activations_backward.GetStrides()[1] == this->GetInputSize() );
441
442 Architecture_t::Rearrange(x, activations_backward);
443
444 if (!fReturnSequence) {
445
446 //Architecture_t::InitializeZero(dy);
447 Architecture_t::InitializeZero(dy);
448
449 //Tensor_t tmp1 = y.At(y.GetShape()[0] - 1).Reshape({y.GetShape()[1], 1, y.GetShape()[2]});
450 Tensor_t tmp2 = dy.At(dy.GetShape()[0] - 1).Reshape({dy.GetShape()[1], 1, dy.GetShape()[2]});
451
452 //Architecture_t::Copy(tmp1, this->GetOutput());
453 Architecture_t::Copy(tmp2, this->GetActivationGradients());
454 }
455 else {
456 Architecture_t::Rearrange(y, this->GetOutput());
457 Architecture_t::Rearrange(dy, this->GetActivationGradients());
458 }
459
460
461
462 // for cudnn Matrix_t and Tensor_t are same type
463 const auto &weights = this->GetWeightsTensor();
464 auto &weightGradients = this->GetWeightGradientsTensor();
465 // note that cudnnRNNBackwardWeights accumulate the weight gradients.
466 // We need then to initialize the tensor to zero every time
467 Architecture_t::InitializeZero(weightGradients);
468
469 // hx is fState
470 auto &hx = this->GetState();
471 auto cx = this->GetCell();
472 // use same for hy and cy
473 auto &dhy = hx;
474 auto &dcy = cx;
475 auto &dhx = hx;
476 auto &dcx = cx;
477
478
479 auto rnnDesc = static_cast<RNNDescriptors_t &>(*fDescriptors);
480 auto rnnWork = static_cast<RNNWorkspace_t &>(*fWorkspace);
481
482 Architecture_t::RNNBackward(x, hx, cx, y, dy, dhy, dcy, weights, dx, dhx, dcx, weightGradients, rnnDesc, rnnWork);
483
484 //Architecture_t::PrintTensor(this->GetOutput(), "output after bwd");
485
486 if (gradients_backward.GetSize() != 0)
487 Architecture_t::Rearrange(gradients_backward, dx);
488
489 return;
490 }
491
492 // BACKWARD FOR CPU
493 // activations backward is input
494 // gradients_backward is activationGradients of layer before it, which is input layer
495 // currently gradient_backward is for input(x) and not for state
496 // TODO use this to change initial state??
497
498
499 bool dummy = false;
500 if (gradients_backward.GetSize() == 0) {
501 dummy = true;
502 }
503 Tensor_t arr_gradients_backward ( fTimeSteps, this->GetBatchSize(), this->GetInputSize());
504 //for (size_t t = 0; t < fTimeSteps; ++t) arr_gradients_backward.emplace_back(this->GetBatchSize(), this->GetInputSize()); // T x B x D
505
506 if (!dummy) {
507 // TODO gradients_backward will be written back on the matrix
508 //Architecture_t::Rearrange(arr_gradients_backward, gradients_backward);
509 }
510 Tensor_t arr_activations_backward ( fTimeSteps, this->GetBatchSize(), this->GetInputSize());
511 //for (size_t t = 0; t < fTimeSteps; ++t) arr_activations_backward.emplace_back(this->GetBatchSize(), this->GetInputSize()); // T x B x D
512 Architecture_t::Rearrange(arr_activations_backward, activations_backward);
513
514 Matrix_t state_gradients_backward(this->GetBatchSize(), fStateSize); // B x H
515 DNN::initialize<Architecture_t>(state_gradients_backward, DNN::EInitialization::kZero);
516
517 Matrix_t initState(this->GetBatchSize(), fStateSize); // B x H
518 DNN::initialize<Architecture_t>(initState, DNN::EInitialization::kZero);
519
520 Tensor_t arr_output ( fTimeSteps, this->GetBatchSize(), fStateSize);
521 Tensor_t arr_actgradients(fTimeSteps, this->GetBatchSize(), fStateSize);
522
523 if (fReturnSequence) {
524 Architecture_t::Rearrange(arr_output, this->GetOutput());
525 Architecture_t::Rearrange(arr_actgradients, this->GetActivationGradients());
526 } else {
527 //
528 arr_output = fY;
529
530 Architecture_t::InitializeZero(arr_actgradients);
531 // need to reshape to pad a time dimension = 1 (note here is columnmajor tensors)
532 Tensor_t tmp_grad = arr_actgradients.At(fTimeSteps - 1).Reshape({this->GetBatchSize(), fStateSize, 1});
533 assert(tmp_grad.GetSize() == this->GetActivationGradients().GetSize());
534 assert(tmp_grad.GetShape()[0] ==
535 this->GetActivationGradients().GetShape()[2]); // B in tmp is [0] and [2] in input act. gradients
536
537 Architecture_t::Rearrange(tmp_grad, this->GetActivationGradients());
538 }
539
540 // reinitialize weights and biases gradients to 0
541 fWeightInputGradients.Zero();
542 fWeightStateGradients.Zero();
543 fBiasGradients.Zero();
544
545 for (size_t t = fTimeSteps; t > 0; t--) {
546 //const Matrix_t & currStateActivations = arr_output[t - 1];
547 Matrix_t actgrad_m = arr_actgradients.At(t - 1).GetMatrix();
548 Architecture_t::ScaleAdd(state_gradients_backward, actgrad_m);
549
550 Matrix_t actbw_m = arr_activations_backward.At(t - 1).GetMatrix();
551 Matrix_t gradbw_m = arr_gradients_backward.At(t - 1).GetMatrix();
552
553 // compute derivatives of activations
554 Tensor_t df = fDerivatives.At(t-1);
555 Tensor_t dy = Tensor_t(state_gradients_backward);
556 //Tensor_t dy = arr_actgradients.At(t - 1);
557 Tensor_t y = arr_output.At(t-1);
558 Architecture_t::ActivationFunctionBackward(df, y,
559 dy, df, //do in place (should work)
560 this->GetActivationFunction(), fActivationDesc);
561
562 Matrix_t df_m = df.GetMatrix();
563
564 // Architecture_t::PrintTensor(df, "dy before");
565 if (t > 1) {
566 Matrix_t precStateActivations = arr_output.At(t - 2).GetMatrix();
567 CellBackward(state_gradients_backward, precStateActivations, actbw_m, gradbw_m, df_m);
568
569 } else {
570 const Matrix_t & precStateActivations = initState;
571 CellBackward(state_gradients_backward, precStateActivations, actbw_m, gradbw_m, df_m);
572
573 }
574 }
575 if (!dummy) {
576 Architecture_t::Rearrange(gradients_backward, arr_gradients_backward );
577 }
578}
579
580//______________________________________________________________________________
581template <typename Architecture_t>
582auto inline TBasicRNNLayer<Architecture_t>::CellBackward(Matrix_t & state_gradients_backward,
583 const Matrix_t & precStateActivations,
584 const Matrix_t & input, Matrix_t & input_gradient, Matrix_t &dF)
585-> Matrix_t &
586{
587 return Architecture_t::RecurrentLayerBackward(state_gradients_backward, fWeightInputGradients, fWeightStateGradients,
588 fBiasGradients, dF, precStateActivations, fWeightsInput,
589 fWeightsState, input, input_gradient);
590}
591
592//______________________________________________________________________________
593template <typename Architecture_t>
595{
596 auto layerxml = gTools().xmlengine().NewChild(parent, 0, "RNNLayer");
597
598 // write All other info like stateSize, inputSize, timeSteps,rememberState
599 gTools().xmlengine().NewAttr(layerxml, 0, "StateSize", gTools().StringFromInt(this->GetStateSize()));
600 gTools().xmlengine().NewAttr(layerxml, 0, "InputSize", gTools().StringFromInt(this->GetInputSize()));
601 gTools().xmlengine().NewAttr(layerxml, 0, "TimeSteps", gTools().StringFromInt(this->GetTimeSteps()));
602 gTools().xmlengine().NewAttr(layerxml, 0, "RememberState", gTools().StringFromInt(this->DoesRememberState()));
603 gTools().xmlengine().NewAttr(layerxml, 0, "ReturnSequence", gTools().StringFromInt(this->DoesReturnSequence()));
604
605 // write weights and bias matrices
606 this->WriteMatrixToXML(layerxml, "InputWeights", this -> GetWeightsAt(0));
607 this->WriteMatrixToXML(layerxml, "StateWeights", this -> GetWeightsAt(1));
608 this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0));
609
610
611}
612
613//______________________________________________________________________________
614template <typename Architecture_t>
616{
617 // Read weights and biases
618 this->ReadMatrixXML(parent,"InputWeights", this -> GetWeightsAt(0));
619 this->ReadMatrixXML(parent,"StateWeights", this -> GetWeightsAt(1));
620 this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
621
622}
623
624} // namespace RNN
625} // namespace DNN
626} // namespace TMVA
627
628#endif
#define f(i)
Definition: RSha256.hxx:104
char name[80]
Definition: TGX11.cxx:110
Tensor_t fDy
cached activation gradient (input of backward) as T x B x S
Definition: RNNLayer.h:106
size_t GetStateSize() const
Definition: RNNLayer.h:169
typename Architecture_t::RNNDescriptors_t RNNDescriptors_t
Definition: RNNLayer.h:71
DNN::EActivationFunction GetActivationFunction() const
Definition: RNNLayer.h:173
void InitState(DNN::EInitialization m=DNN::EInitialization::kZero)
Initialize the state method.
Definition: RNNLayer.h:286
const Matrix_t & GetWeightInputGradients() const
Definition: RNNLayer.h:193
const Tensor_t & GetWeightGradientsTensor() const
Definition: RNNLayer.h:200
void Print() const
Prints the info about the layer.
Definition: RNNLayer.h:295
typename Architecture_t::RecurrentDescriptor_t LayerDescriptor_t
Definition: RNNLayer.h:65
Tensor_t fY
cached output tensor as T x B x S
Definition: RNNLayer.h:104
Tensor_t fDerivatives
First fDerivatives of the activations.
Definition: RNNLayer.h:87
const Matrix_t & GetWeightStateGradients() const
Definition: RNNLayer.h:195
Matrix_t & fWeightsInput
Input weights, fWeights[0].
Definition: RNNLayer.h:83
Matrix_t & fWeightsState
Prev state weights, fWeights[1].
Definition: RNNLayer.h:84
virtual ~TBasicRNNLayer()
Destructor.
Definition: RNNLayer.h:249
TDescriptors * fDescriptors
Keeps all the RNN descriptors.
Definition: RNNLayer.h:97
Tensor_t fX
cached input tensor as T x B x I
Definition: RNNLayer.h:103
void Forward(Tensor_t &input, bool isTraining=true)
Compute and return the next state with given input matrix.
Definition: RNNLayer.h:322
Matrix_t & fBiases
Biases.
Definition: RNNLayer.h:85
Architecture_t::ActivationDescriptor_t fActivationDesc
Definition: RNNLayer.h:95
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Definition: RNNLayer.h:615
typename Architecture_t::TensorDescriptor_t TensorDescriptor_t
Definition: RNNLayer.h:67
bool fReturnSequence
Return in output full sequence or just last element in time.
Definition: RNNLayer.h:78
const Tensor_t & GetWeightsTensor() const
Definition: RNNLayer.h:198
Matrix_t & GetBiasStateGradients()
Definition: RNNLayer.h:190
size_t fStateSize
Hidden state size of RNN.
Definition: RNNLayer.h:76
const Matrix_t & GetState() const
Definition: RNNLayer.h:175
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward)
Backpropagates the error.
Definition: RNNLayer.h:426
const Matrix_t & GetCell() const
Definition: RNNLayer.h:177
Matrix_t & CellBackward(Matrix_t &state_gradients_backward, const Matrix_t &precStateActivations, const Matrix_t &input, Matrix_t &input_gradient, Matrix_t &dF)
Backward for a single time unit a the corresponding call to Forward(...).
Definition: RNNLayer.h:582
typename Architecture_t::Matrix_t Matrix_t
Definition: RNNLayer.h:62
typename Architecture_t::DropoutDescriptor_t HelperDescriptor_t
Definition: RNNLayer.h:68
typename Architecture_t::RNNWorkspace_t RNNWorkspace_t
Definition: RNNLayer.h:70
Matrix_t fState
Hidden State.
Definition: RNNLayer.h:82
Matrix_t & fWeightInputGradients
Gradients w.r.t. the input weights.
Definition: RNNLayer.h:88
DNN::EActivationFunction fF
Activation function of the hidden state.
Definition: RNNLayer.h:80
TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, bool returnSequence=false, DNN::EActivationFunction f=DNN::EActivationFunction::kTanh, bool training=true, DNN::EInitialization fA=DNN::EInitialization::kZero)
Constructor.
Definition: RNNLayer.h:213
Tensor_t & GetWeightGradientsTensor()
Definition: RNNLayer.h:199
size_t GetTimeSteps() const
Getters.
Definition: RNNLayer.h:168
bool fRememberState
Remember state in next pass.
Definition: RNNLayer.h:77
Matrix_t & fWeightStateGradients
Gradients w.r.t. the recurring weights.
Definition: RNNLayer.h:89
Matrix_t & GetWeightInputGradients()
Definition: RNNLayer.h:192
const Matrix_t & GetBiasesState() const
Definition: RNNLayer.h:189
void Update(const Scalar_t learningRate)
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Definition: RNNLayer.h:594
size_t fTimeSteps
Timesteps for RNN.
Definition: RNNLayer.h:75
bool DoesRememberState() const
Definition: RNNLayer.h:171
void CellForward(const Matrix_t &input, Matrix_t &dF)
Forward for a single cell (time unit)
Definition: RNNLayer.h:403
Tensor_t fDx
cached gradient on the input (output of backward) as T x B x I
Definition: RNNLayer.h:105
const Matrix_t & GetBiasStateGradients() const
Definition: RNNLayer.h:191
size_t GetInputSize() const
Definition: RNNLayer.h:170
Matrix_t & GetWeightStateGradients()
Definition: RNNLayer.h:194
bool DoesReturnSequence() const
Definition: RNNLayer.h:172
Matrix_t & fBiasGradients
Gradients w.r.t. the bias values.
Definition: RNNLayer.h:90
const Matrix_t & GetWeightsInput() const
Definition: RNNLayer.h:180
Matrix_t fCell
Empty matrix for RNN.
Definition: RNNLayer.h:100
const Tensor_t & GetDerivatives() const
Definition: RNNLayer.h:184
virtual void Initialize()
Initialize the weights according to the given initialization method.
Definition: RNNLayer.h:264
const Matrix_t & GetWeightsState() const
Definition: RNNLayer.h:182
typename Architecture_t::FilterDescriptor_t WeightsDescriptor_t
Definition: RNNLayer.h:66
Generic General Layer class.
Definition: GeneralLayer.h:51
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:395
typename Architecture_t::Scalar_t Scalar_t
Definition: GeneralLayer.h:55
typename Architecture_t::Tensor_t Tensor_t
Definition: GeneralLayer.h:53
size_t GetInputWidth() const
Definition: GeneralLayer.h:166
TXMLEngine & xmlengine()
Definition: Tools.h:268
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:715
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:586
Double_t y[n]
Definition: legend1.C:17
Double_t x[n]
Definition: legend1.C:17
static double A[]
void Copy(void *source, void *dest)
auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name="matrix") -> void
Definition: RNNLayer.h:306
EInitialization
Definition: Functions.h:72
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32
create variable transformations
Tools & gTools()
auto * m
Definition: textangle.C:8