29#ifndef TMVA_DNN_RNN_LAYER 
   30#define TMVA_DNN_RNN_LAYER 
   54template<
typename Architecture_t>
 
   60   using Matrix_t = 
typename Architecture_t::Matrix_t;
 
   61   using Scalar_t = 
typename Architecture_t::Scalar_t;
 
   85   TBasicRNNLayer(
size_t batchSize, 
size_t stateSize, 
size_t inputSize,
 
   86                  size_t timeSteps, 
bool rememberState = 
false,
 
  111                 const Tensor_t &activations_backward,
 
  112                 std::vector<Matrix_t> &inp1,
 
  113                 std::vector<Matrix_t> &inp2);
 
  121                              const Matrix_t & precStateActivations,
 
  164template <
typename Architecture_t>
 
  169   : 
VGeneralLayer<Architecture_t>(batchSize, 1, timeSteps, inputSize, 1, timeSteps, stateSize, 2,
 
  170                                   {stateSize, stateSize}, {inputSize, stateSize}, 1, {stateSize}, {1}, batchSize,
 
  171                                   timeSteps, stateSize, fA),
 
  172     fTimeSteps(timeSteps),
 
  173     fStateSize(stateSize),
 
  174     fRememberState(rememberState),
 
  176     fState(batchSize, stateSize),
 
  177     fWeightsInput(this->GetWeightsAt(0)),
 
  178     fWeightsState(this->GetWeightsAt(1)),
 
  179     fBiases(this->GetBiasesAt(0)),
 
  180     fWeightInputGradients(this->GetWeightGradientsAt(0)),
 
  181     fWeightStateGradients(this->GetWeightGradientsAt(1)),
 
  182     fBiasGradients(this->GetBiasGradientsAt(0))
 
  184  for (
size_t i = 0; i < timeSteps; ++i) {
 
  185     fDerivatives.emplace_back(batchSize, stateSize);
 
  191template <
typename Architecture_t>
 
  193   : 
VGeneralLayer<Architecture_t>(layer), fTimeSteps(layer.fTimeSteps), fStateSize(layer.fStateSize),
 
  194     fRememberState(layer.fRememberState), fF(layer.GetActivationFunction()),
 
  195     fState(layer.GetBatchSize(), layer.GetStateSize()), fWeightsInput(this->GetWeightsAt(0)),
 
  196     fWeightsState(this->GetWeightsAt(1)), fBiases(this->GetBiasesAt(0)),
 
  197     fDerivatives(), fWeightInputGradients(this->GetWeightGradientsAt(0)),
 
  198     fWeightStateGradients(this->GetWeightGradientsAt(1)), fBiasGradients(this->GetBiasGradientsAt(0))
 
  219template <
typename Architecture_t>
 
  226template<
typename Architecture_t>
 
  230   std::cout << 
" RECURRENT Layer: \t ";
 
  231   std::cout << 
" (NInput = " << this->GetInputSize();  
 
  232   std::cout << 
", NState = " << this->GetStateSize();  
 
  233   std::cout << 
", NTime  = " << this->GetTimeSteps() << 
" )";  
 
  234   std::cout << 
"\tOutput = ( " << this->GetOutput().size() << 
" , " << this->GetOutput()[0].GetNrows() << 
" , " << this->GetOutput()[0].GetNcols() << 
" )\n";
 
  237template <
typename Architecture_t>
 
  238auto debugMatrix(
const typename Architecture_t::Matrix_t &
A, 
const std::string 
name = 
"matrix")
 
  241  std::cout << 
name << 
"\n";
 
  242  for (
size_t i = 0; i < 
A.GetNrows(); ++i) {
 
  243    for (
size_t j = 0; j < 
A.GetNcols(); ++j) {
 
  244        std::cout << 
A(i, j) << 
" ";
 
  248  std::cout << 
"********\n";
 
  253template <
typename Architecture_t>
 
  263   for (
size_t t = 0; t < fTimeSteps; ++t) arrInput.emplace_back(this->GetBatchSize(), this->GetInputWidth()); 
 
  264   Architecture_t::Rearrange(arrInput, input);
 
  266   for (
size_t t = 0; t < fTimeSteps;++t) arrOutput.emplace_back(this->GetBatchSize(), fStateSize); 
 
  269   for (
size_t t = 0; t < fTimeSteps; ++t) {
 
  270      CellForward(arrInput[t], fDerivatives[t]);
 
  273   Architecture_t::Rearrange(this->GetOutput(), arrOutput);  
 
  277template <
typename Architecture_t>
 
  283   Matrix_t tmpState(fState.GetNrows(), fState.GetNcols());
 
  284   Architecture_t::MultiplyTranspose(tmpState, fState, fWeightsState);
 
  285   Architecture_t::MultiplyTranspose(fState, input, fWeightsInput);
 
  286   Architecture_t::ScaleAdd(fState, tmpState);
 
  287   Architecture_t::AddRowWise(fState, fBiases);
 
  288   DNN::evaluateDerivative<Architecture_t>(dF, fAF, fState);
 
  289   DNN::evaluate<Architecture_t>(fState, fAF);
 
  293template <
typename Architecture_t>
 
  295                                                     const Tensor_t &activations_backward, 
 
  296                                                     std::vector<Matrix_t> & , std::vector<Matrix_t> &
 
  306  if (gradients_backward.size() == 0 || gradients_backward[0].GetNrows() == 0 || gradients_backward[0].GetNcols() == 0) {
 
  310  for (
size_t t = 0; t < fTimeSteps; ++t) arr_gradients_backward.emplace_back(this->GetBatchSize(), this->GetInputSize()); 
 
  317  for (
size_t t = 0; t < fTimeSteps; ++t) arr_activations_backward.emplace_back(this->GetBatchSize(), this->GetInputSize());  
 
  318  Architecture_t::Rearrange(arr_activations_backward, activations_backward);
 
  320   Matrix_t state_gradients_backward(this->GetBatchSize(), fStateSize);  
 
  323   Matrix_t initState(this->GetBatchSize(), fStateSize);  
 
  327   for (
size_t t = 0; t < fTimeSteps; ++t) arr_output.emplace_back(this->GetBatchSize(), fStateSize);
 
  328   Architecture_t::Rearrange(arr_output, this->GetOutput());
 
  331   for (
size_t t = 0; t < fTimeSteps; ++t) arr_actgradients.emplace_back(this->GetBatchSize(), fStateSize);
 
  332   Architecture_t::Rearrange(arr_actgradients, this->GetActivationGradients());
 
  335   fWeightInputGradients.Zero();
 
  336   fWeightStateGradients.Zero();
 
  337   fBiasGradients.Zero(); 
 
  339   for (
size_t t = fTimeSteps; t > 0; t--) {
 
  341      Architecture_t::ScaleAdd(state_gradients_backward, arr_actgradients[t - 1]);
 
  343         const Matrix_t & precStateActivations = arr_output[t - 2];
 
  344         CellBackward(state_gradients_backward, precStateActivations, arr_activations_backward[t - 1],
 
  345               arr_gradients_backward[t - 1], fDerivatives[t - 1]);
 
  347         const Matrix_t & precStateActivations = initState;
 
  348         CellBackward(state_gradients_backward, precStateActivations, arr_activations_backward[t - 1],
 
  349               arr_gradients_backward[t - 1], fDerivatives[t - 1]);
 
  353      Architecture_t::Rearrange(gradients_backward, arr_gradients_backward );
 
  359template <
typename Architecture_t>
 
  361                                                     const Matrix_t & precStateActivations,
 
  365   return Architecture_t::RecurrentLayerBackward(state_gradients_backward, fWeightInputGradients, fWeightStateGradients,
 
  366                                                 fBiasGradients, dF, precStateActivations, fWeightsInput,
 
  367                                                 fWeightsState, input, input_gradient);
 
  371template <
typename Architecture_t>
 
  383   this->WriteMatrixToXML(layerxml, 
"InputWeights", 
this -> GetWeightsAt(0));
 
  384   this->WriteMatrixToXML(layerxml, 
"StateWeights", 
this -> GetWeightsAt(1));
 
  385   this->WriteMatrixToXML(layerxml, 
"Biases",  
this -> GetBiasesAt(0));
 
  391template <
typename Architecture_t>
 
  395   this->ReadMatrixXML(parent,
"InputWeights", 
this -> GetWeightsAt(0));
 
  396   this->ReadMatrixXML(parent,
"StateWeights", 
this -> GetWeightsAt(1));
 
  397   this->ReadMatrixXML(parent,
"Biases", 
this -> GetBiasesAt(0));
 
static RooMathCoreReg dummy
size_t GetStateSize() const
DNN::EActivationFunction GetActivationFunction() const
void InitState(DNN::EInitialization m=DNN::EInitialization::kZero)
Initialize the weights according to the given initialization method.
const Matrix_t & GetWeightInputGradients() const
void Print() const
Prints the info about the layer.
const Matrix_t & GetWeightStateGradients() const
Matrix_t & fWeightsInput
Input weights, fWeights[0].
Matrix_t & fWeightsState
Prev state weights, fWeights[1].
Matrix_t & fBiases
Biases.
std::vector< Matrix_t > fDerivatives
First fDerivatives of the activations.
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Matrix_t & GetBiasStateGradients()
size_t fStateSize
Hidden state size of RNN.
Matrix_t & GetWeightsState()
const Matrix_t & GetState() const
void Forward(Tensor_t &input, bool isTraining=true)
Compute and return the next state with given input matrix.
const std::vector< Matrix_t > & GetDerivatives() const
Matrix_t & CellBackward(Matrix_t &state_gradients_backward, const Matrix_t &precStateActivations, const Matrix_t &input, Matrix_t &input_gradient, Matrix_t &dF)
Backward for a single time unit a the corresponding call to Forward(...).
TBasicRNNLayer(size_t batchSize, size_t stateSize, size_t inputSize, size_t timeSteps, bool rememberState=false, DNN::EActivationFunction f=DNN::EActivationFunction::kTanh, bool training=true, DNN::EInitialization fA=DNN::EInitialization::kZero)
Constructor.
typename Architecture_t::Matrix_t Matrix_t
Matrix_t fState
Hidden State.
Matrix_t & fWeightInputGradients
Gradients w.r.t. the input weights.
DNN::EActivationFunction fF
Activation function of the hidden state.
Matrix_t & GetDerivativesAt(size_t i)
Matrix_t & GetWeightsInput()
size_t GetTimeSteps() const
Getters.
std::vector< Matrix_t > & GetDerivatives()
bool fRememberState
Remember state in next pass.
Matrix_t & fWeightStateGradients
Gradients w.r.t. the recurring weights.
Matrix_t & GetWeightInputGradients()
const Matrix_t & GetBiasesState() const
void Update(const Scalar_t learningRate)
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
size_t fTimeSteps
Timesteps for RNN.
void CellForward(const Matrix_t &input, Matrix_t &dF)
Forward for a single cell (time unit)
std::vector< Matrix_t > Tensor_t
bool IsRememberState() const
const Matrix_t & GetBiasStateGradients() const
size_t GetInputSize() const
Matrix_t & GetWeightStateGradients()
Matrix_t & fBiasGradients
Gradients w.r.t. the bias values.
const Matrix_t & GetWeightsInput() const
Matrix_t & GetBiasesState()
const Matrix_t & GetWeightsState() const
const Matrix_t & GetDerivativesAt(size_t i) const
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward, std::vector< Matrix_t > &inp1, std::vector< Matrix_t > &inp2)
Backpropagates the error.
Generic General Layer class.
typename Architecture_t::Scalar_t Scalar_t
size_t GetBatchSize() const
Getters.
size_t GetInputWidth() const
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
void Copy(void *source, void *dest)
auto debugMatrix(const typename Architecture_t::Matrix_t &A, const std::string name="matrix") -> void
EActivationFunction
Enum that represents layer activation functions.
create variable transformations