doc/master/GeneralLayer_8h_source.html

// @(#)root/tmva/tmva/dnn:$Id$

// Author: Vladimir Ilievski


/**********************************************************************************

 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *

 * Package: TMVA                                                                  *

 * Class  : TGeneralLayer                                                         *

 *                                             *

 *                                                                                *

 * Description:                                                                   *

 *      General Deep Neural Network Layer                                         *

 *                                                                                *

 * Authors (alphabetical):                                                        *

 *      Vladimir Ilievski      <ilievski.vladimir@live.com>  - CERN, Switzerland  *

 *                                                                                *

 * Copyright (c) 2005-2015:                                                       *

 *      CERN, Switzerland                                                         *

 *      U. of Victoria, Canada                                                    *

 *      MPI-K Heidelberg, Germany                                                 *

 *      U. of Bonn, Germany                                                       *

 *                                                                                *

 * Redistribution and use in source and binary forms, with or without             *

 * modification, are permitted according to the terms listed in LICENSE           *

 * (see tmva/doc/LICENSE)                                          *

 **********************************************************************************/


#ifndef TMVA_DNN_GENERALLAYER

#define TMVA_DNN_GENERALLAYER


#include <sstream>

#include <limits>

#include <vector>

#include <string>


// for xml

#include "TMVA/Tools.h"

#include "TError.h"   // for R__ASSERT


#include "TMVA/DNN/Functions.h"


namespace TMVA {

namespace DNN {


/** \class VGeneralLayer

    Generic General Layer class.


    This class represents the general class for all layers in the Deep Learning

    Module.

 */

template <typename Architecture_t>

class VGeneralLayer {


   using Tensor_t = typename Architecture_t::Tensor_t;

   using Matrix_t = typename Architecture_t::Matrix_t;

   using Scalar_t = typename Architecture_t::Scalar_t;


protected:

   size_t fBatchSize; ///< Batch size used for training and evaluation


   size_t fInputDepth;  ///< The depth of the previous layer or input.

   size_t fInputHeight; ///< The height of the previous layer or input.

   size_t fInputWidth;  ///< The width of the previous layer or input.


   size_t fDepth;  ///< The depth of the layer.

   size_t fHeight; ///< The height of the layer.

   size_t fWidth;  ///< The width of this layer.


   bool fIsTraining; ///< Flag indicating the mode


   std::vector<Matrix_t> fWeights; ///< The weights associated to the layer.

   std::vector<Matrix_t> fBiases;  ///< The biases associated to the layer.


   std::vector<Matrix_t> fWeightGradients; ///< Gradients w.r.t. the weights of the layer.

   std::vector<Matrix_t> fBiasGradients;   ///< Gradients w.r.t. the bias values of the layer.


   Tensor_t fOutput;              ///< Activations of this layer.

   Tensor_t fActivationGradients; ///< Gradients w.r.t. the activations of this layer.


   EInitialization fInit; ///< The initialization method.


public:

   /*! Constructor */

   VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,

                 size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols,

                 size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows,

                 size_t OutputNCols, EInitialization Init);


   /*! General Constructor with different weights dimension */

   VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,

                 size_t Height, size_t Width, size_t WeightsNSlices, std::vector<size_t> WeightsNRows,

                 std::vector<size_t> WeightsNCols, size_t BiasesNSlices, std::vector<size_t> BiasesNRows,

                 std::vector<size_t> BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols,

                 EInitialization Init);


   /*! Copy the layer provided as a pointer */

   VGeneralLayer(VGeneralLayer<Architecture_t> *layer);


   /*! Copy Constructor */

   VGeneralLayer(const VGeneralLayer &);


   /*! Virtual Destructor. */

   virtual ~VGeneralLayer();


   /*! Initialize the weights and biases according to the given initialization method. */

   virtual void Initialize();


   /*! Computes activation of the layer for the given input. The input

    * must be in 3D tensor form with the different matrices corresponding to

    * different events in the batch.  */

   virtual void Forward(Tensor_t &input, bool applyDropout = false) = 0;


   /*! Backpropagates the error. Must only be called directly at the corresponding

    *  call to Forward(...). */

   virtual void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward ) = 0;

   /////                      std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2) = 0;


    /*! Reset some training flags after a loop on all batches

       Some layer (e.g. batchnormalization) might need to implement the function in case some operations

       are needed after looping an all batches                                                 */

   virtual void ResetTraining() {}


   /*! Updates the weights and biases, given the learning rate */

   void Update(const Scalar_t learningRate);


   /*! Updates the weights, given the gradients and the learning rate, */

   void UpdateWeights(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);


   /*! Updates the biases, given the gradients and the learning rate. */

   void UpdateBiases(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);


   /*! Updates the weight gradients, given some other weight gradients and learning rate. */

   void UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);


   /*! Updates the bias gradients, given some other weight gradients and learning rate. */

   void UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);


   /*! Copies the weights provided as an input.  */

   void CopyWeights(const std::vector<Matrix_t> &otherWeights);


   /*! Copies the biases provided as an input. */

   void CopyBiases(const std::vector<Matrix_t> &otherBiases);


   /*! Copy all trainable weight and biases from another equivalent layer but with different architecture

       The function can copy also extra parameters in addition to weights and biases if they are return

       by the function GetExtraLayerParameters */

   template <typename Arch>

   void CopyParameters(const VGeneralLayer<Arch> &layer);


   /*! Prints the info about the layer. */

   virtual void Print() const = 0;


   /*! Writes the information and the weights about the layer in an XML node. */

   virtual void AddWeightsXMLTo(void *parent) = 0;


   /*! Read the information and the weights about the layer from XML node. */

   virtual void ReadWeightsFromXML(void *parent) = 0;


   /*! Set Dropout probability. Reimplemented for layers supporting droput */

   virtual void SetDropoutProbability(Scalar_t ) {}


   /*! Getters */

   size_t GetBatchSize() const { return fBatchSize; }

   size_t GetInputDepth() const { return fInputDepth; }

   size_t GetInputHeight() const { return fInputHeight; }

   size_t GetInputWidth() const { return fInputWidth; }

   size_t GetDepth() const { return fDepth; }

   size_t GetHeight() const { return fHeight; }

   size_t GetWidth() const { return fWidth; }

   bool IsTraining() const { return fIsTraining; }


   const std::vector<Matrix_t> &GetWeights() const { return fWeights; }

   std::vector<Matrix_t> &GetWeights() { return fWeights; }


   const Matrix_t &GetWeightsAt(size_t i) const { return fWeights[i]; }

   Matrix_t &GetWeightsAt(size_t i) { return fWeights[i]; }


   const std::vector<Matrix_t> &GetBiases() const { return fBiases; }

   std::vector<Matrix_t> &GetBiases() { return fBiases; }


   const Matrix_t &GetBiasesAt(size_t i) const { return fBiases[i]; }

   Matrix_t &GetBiasesAt(size_t i) { return fBiases[i]; }


   const std::vector<Matrix_t> &GetWeightGradients() const { return fWeightGradients; }

   std::vector<Matrix_t> &GetWeightGradients() { return fWeightGradients; }


   const Matrix_t &GetWeightGradientsAt(size_t i) const { return fWeightGradients[i]; }

   Matrix_t &GetWeightGradientsAt(size_t i) { return fWeightGradients[i]; }


   const std::vector<Matrix_t> &GetBiasGradients() const { return fBiasGradients; }

   std::vector<Matrix_t> &GetBiasGradients() { return fBiasGradients; }


   const Matrix_t &GetBiasGradientsAt(size_t i) const { return fBiasGradients[i]; }

   Matrix_t &GetBiasGradientsAt(size_t i) { return fBiasGradients[i]; }


   const Tensor_t &GetOutput() const { return fOutput; }

   Tensor_t &GetOutput() { return fOutput; }


   const Tensor_t &GetActivationGradients() const { return fActivationGradients; }

   Tensor_t &GetActivationGradients() { return fActivationGradients; }


   Matrix_t GetOutputAt(size_t i) { return fOutput.At(i).GetMatrix(); }

   const Matrix_t &GetOutputAt(size_t i) const { return fOutput.At(i).GetMatrix(); }


   Matrix_t GetActivationGradientsAt(size_t i) { return fActivationGradients.At(i).GetMatrix(); }

   const Matrix_t &GetActivationGradientsAt(size_t i) const { return fActivationGradients.At(i).GetMatrix(); }


   // function to retrieve additional layer parameters which are learned during training but they are not weights

   // an example are the mean and std of batch normalization layer

   virtual std::vector<Matrix_t> GetExtraLayerParameters() const { return std::vector<Matrix_t>(); }

   // same thing but to set these extra parameters

   virtual void  SetExtraLayerParameters(const std::vector<Matrix_t> & ) {}


   EInitialization GetInitialization() const { return fInit; }


   /*! Setters */

   void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }

   void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }

   void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }

   void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }

   void SetDepth(size_t depth) { fDepth = depth; }

   void SetHeight(size_t height) { fHeight = height; }

   void SetWidth(size_t width) { fWidth = width; }

   void SetIsTraining(bool isTraining) { fIsTraining = isTraining; }


   /// helper functions for XML

   void WriteTensorToXML( void * node, const char * name, const std::vector<Matrix_t> & tensor);

   void WriteMatrixToXML( void * node, const char * name, const Matrix_t & matrix);


   void ReadMatrixXML( void * node, const char * name, Matrix_t & matrix);


};


//

//

//  The General Layer Class - Implementation

//_________________________________________________________________________________________________

template <typename Architecture_t>

VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,

                                             size_t depth, size_t height, size_t width, size_t weightsNSlices,

                                             size_t weightsNRows, size_t weightsNCols, size_t biasesNSlices,

                                             size_t biasesNRows, size_t biasesNCols, size_t outputNSlices,

                                             size_t outputNRows, size_t outputNCols, EInitialization init)

   :  fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),

      fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),

      fOutput( outputNSlices, outputNRows, outputNCols ),

      fActivationGradients( outputNSlices, outputNRows, outputNCols ),

      fInit(init)

{


   for (size_t i = 0; i < weightsNSlices; i++) {

      fWeights.emplace_back(weightsNRows, weightsNCols);

      fWeightGradients.emplace_back(weightsNRows, weightsNCols);

   }


   for (size_t i = 0; i < biasesNSlices; i++) {

      fBiases.emplace_back(biasesNRows, biasesNCols);

      fBiasGradients.emplace_back(biasesNRows, biasesNCols);

   }

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,

                                             size_t depth, size_t height, size_t width, size_t weightsNSlices,

                                             std::vector<size_t> weightsNRows, std::vector<size_t> weightsNCols,

                                             size_t biasesNSlices, std::vector<size_t> biasesNRows,

                                             std::vector<size_t> biasesNCols, size_t outputNSlices, size_t outputNRows,

                                             size_t outputNCols, EInitialization init)

   :  fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),

      fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),

      fOutput( outputNSlices, outputNRows, outputNCols ),

      fActivationGradients( outputNSlices, outputNRows, outputNCols ),

      fInit(init)

{

   // add constructor for weights with different shapes (e.g. in recurrent layers)

   for (size_t i = 0; i < weightsNSlices; i++) {

      fWeights.emplace_back(weightsNRows[i], weightsNCols[i]);

      fWeightGradients.emplace_back(weightsNRows[i], weightsNCols[i]);

   }


   for (size_t i = 0; i < biasesNSlices; i++) {

      fBiases.emplace_back(biasesNRows[i], biasesNCols[i]);

      fBiasGradients.emplace_back(biasesNRows[i], biasesNCols[i]);

   }


   // for (size_t i = 0; i < outputNSlices; i++) {

   //    fOutput.emplace_back(outputNRows, outputNCols);

   //    fActivationGradients.emplace_back(outputNRows, outputNCols);

   // }

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

VGeneralLayer<Architecture_t>::VGeneralLayer(VGeneralLayer<Architecture_t> *layer)

   :  fBatchSize(layer->GetBatchSize()), fInputDepth(layer->GetInputDepth()), fInputHeight(layer->GetInputHeight()),

      fInputWidth(layer->GetInputWidth()), fDepth(layer->GetDepth()), fHeight(layer->GetHeight()),

      fWidth(layer->GetWidth()), fIsTraining(layer->IsTraining()), fWeights(), fBiases(), fWeightGradients(),

      fBiasGradients(),

      fOutput( layer->GetOutput().GetShape() ),   // construct from shape of other tensor

      fActivationGradients( layer->GetActivationGradients().GetShape() ),

      fInit(layer->GetInitialization() )

{

   // Constructor from another layer pointer of a different architecture

   size_t weightsNSlices = (layer->GetWeights()).size();

   size_t weightsNRows = 0;

   size_t weightsNCols = 0;


   for (size_t i = 0; i < weightsNSlices; i++) {

      weightsNRows = (layer->GetWeightsAt(i)).GetNrows();

      weightsNCols = (layer->GetWeightsAt(i)).GetNcols();


      fWeights.emplace_back(weightsNRows, weightsNCols);

      fWeightGradients.emplace_back(weightsNRows, weightsNCols);


      Architecture_t::Copy(fWeights[i], layer->GetWeightsAt(i));

   }


   size_t biasesNSlices = (layer->GetBiases()).size();

   size_t biasesNRows = 0;

   size_t biasesNCols = 0;


   for (size_t i = 0; i < biasesNSlices; i++) {

      biasesNRows = (layer->GetBiasesAt(i)).GetNrows();

      biasesNCols = (layer->GetBiasesAt(i)).GetNcols();


      fBiases.emplace_back(biasesNRows, biasesNCols);

      fBiasGradients.emplace_back(biasesNRows, biasesNCols);


      Architecture_t::Copy(fBiases[i], layer->GetBiasesAt(i));

   }

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

VGeneralLayer<Architecture_t>::VGeneralLayer(const VGeneralLayer &layer)

   :  fBatchSize(layer.fBatchSize), fInputDepth(layer.fInputDepth), fInputHeight(layer.fInputHeight),

      fInputWidth(layer.fInputWidth), fDepth(layer.fDepth), fHeight(layer.fHeight), fWidth(layer.fWidth),

      fIsTraining(layer.fIsTraining), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),

      fOutput( layer.GetOutput() ),

      fActivationGradients( layer.GetActivationGradients() ),

      fInit( layer.GetInitialization())

{

   // copy constructor

   size_t weightsNSlices = layer.fWeights.size();

   size_t weightsNRows = 0;

   size_t weightsNCols = 0;


   for (size_t i = 0; i < weightsNSlices; i++) {

      weightsNRows = (layer.fWeights[i]).GetNrows();

      weightsNCols = (layer.fWeights[i]).GetNcols();


      fWeights.emplace_back(weightsNRows, weightsNCols);

      fWeightGradients.emplace_back(weightsNRows, weightsNCols);


      Architecture_t::Copy(fWeights[i], layer.fWeights[i]);

   }


   size_t biasesNSlices = layer.fBiases.size();

   size_t biasesNRows = 0;

   size_t biasesNCols = 0;


   for (size_t i = 0; i < biasesNSlices; i++) {

      biasesNRows = (layer.fBiases[i]).GetNrows();

      biasesNCols = (layer.fBiases[i]).GetNcols();


      fBiases.emplace_back(biasesNRows, biasesNCols);

      fBiasGradients.emplace_back(biasesNRows, biasesNCols);


      Architecture_t::Copy(fBiases[i], layer.fBiases[i]);

   }


   size_t outputNSlices = layer.fOutput.size();

   size_t outputNRows = 0;

   size_t outputNCols = 0;


   for (size_t i = 0; i < outputNSlices; i++) {

      outputNRows = (layer.fOutput[i]).GetNrows();

      outputNCols = (layer.fOutput[i]).GetNcols();


      fOutput.emplace_back(outputNRows, outputNCols);

      fActivationGradients.emplace_back(outputNRows, outputNCols);

   }

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

VGeneralLayer<Architecture_t>::~VGeneralLayer()

{

   // Nothing to do here.

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

auto VGeneralLayer<Architecture_t>::Initialize() -> void

{

   for (size_t i = 0; i < fWeights.size(); i++) {

      initialize<Architecture_t>(fWeights[i], this->GetInitialization());

      initialize<Architecture_t>(fWeightGradients[i], EInitialization::kZero);

   }


   for (size_t i = 0; i < fBiases.size(); i++) {

      initialize<Architecture_t>(fBiases[i], EInitialization::kZero);

      initialize<Architecture_t>(fBiasGradients[i], EInitialization::kZero);

   }

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

auto VGeneralLayer<Architecture_t>::Update(const Scalar_t learningRate) -> void

{

   this->UpdateWeights(fWeightGradients, learningRate);

   this->UpdateBiases(fBiasGradients, learningRate);

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

auto VGeneralLayer<Architecture_t>::UpdateWeights(const std::vector<Matrix_t> &weightGradients,

                                                  const Scalar_t learningRate) -> void

{

   for (size_t i = 0; i < fWeights.size(); i++) {

      Architecture_t::ScaleAdd(fWeights[i], weightGradients[i], -learningRate);

   }

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

auto VGeneralLayer<Architecture_t>::UpdateBiases(const std::vector<Matrix_t> &biasGradients,

                                                 const Scalar_t learningRate) -> void

{

   for (size_t i = 0; i < fBiases.size(); i++) {

      Architecture_t::ScaleAdd(fBiases[i], biasGradients[i], -learningRate);

   }

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

auto VGeneralLayer<Architecture_t>::UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients,

                                                          const Scalar_t learningRate) -> void

{

   for (size_t i = 0; i < fWeightGradients.size(); i++) {

      Architecture_t::ScaleAdd(fWeightGradients[i], weightGradients[i], -learningRate);

   }

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

auto VGeneralLayer<Architecture_t>::UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients,

                                                        const Scalar_t learningRate) -> void

{

   for (size_t i = 0; i < fBiasGradients.size(); i++) {

      Architecture_t::ScaleAdd(fBiasGradients[i], biasGradients[i], -learningRate);

   }

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

auto VGeneralLayer<Architecture_t>::CopyWeights(const std::vector<Matrix_t> &otherWeights) -> void

{


   for (size_t i = 0; i < fWeights.size(); i++) {

      Architecture_t::Copy(fWeights[i], otherWeights[i]);

   }

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

auto VGeneralLayer<Architecture_t>::CopyBiases(const std::vector<Matrix_t> &otherBiases) -> void

{

   for (size_t i = 0; i < fBiases.size(); i++) {

      Architecture_t::Copy(fBiases[i], otherBiases[i]);

   }

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

template <typename Arch>

void VGeneralLayer<Architecture_t>::CopyParameters(const VGeneralLayer<Arch> &layer)

{

   //assert(!std::is_same<Arch, Architecture_t>::value);

   // copy weights from a different architecture- default generic implementation

   Architecture_t::CopyDiffArch(this->GetWeights(), layer.GetWeights());

   Architecture_t::CopyDiffArch(this->GetBiases(), layer.GetBiases());


   // copy also the additional layer parameters

   auto params = layer.GetExtraLayerParameters();

   if (params.size() > 0) {

      auto paramsToCopy = GetExtraLayerParameters();

      Architecture_t::CopyDiffArch(paramsToCopy, params );

      SetExtraLayerParameters(paramsToCopy);

   }

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

auto VGeneralLayer<Architecture_t>::WriteTensorToXML(void * node, const char * name, const std::vector<Matrix_t> & tensor) -> void

{

   auto xmlengine = gTools().xmlengine();

   void* matnode = xmlengine.NewChild(node, 0, name);

   if (tensor.size() == 0) return;

   xmlengine.NewAttr(matnode,0,"Depth", gTools().StringFromInt(tensor.size()) );

   // assume same number of rows and columns for every matrix in std::vector

   xmlengine.NewAttr(matnode,0,"Rows", gTools().StringFromInt(tensor[0].GetNrows()) );

   xmlengine.NewAttr(matnode,0,"Columns", gTools().StringFromInt(tensor[0].GetNcols()) );

   std::stringstream s;

   for (size_t i = 0; i < tensor.size(); ++i) {

      auto & mat = tensor[i];

      for (Int_t row = 0; row < mat.GetNrows(); row++) {

         for (Int_t col = 0; col < mat.GetNcols(); col++) {

            // TString tmp = TString::Format( "%5.15e ", (mat)(row,col) );

            // s << tmp.Data();

            s << std::scientific << mat(row, col) << " ";

         }

      }

   }

   xmlengine.AddRawLine( matnode, s.str().c_str() );

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

auto VGeneralLayer<Architecture_t>::WriteMatrixToXML(void * node, const char * name, const Matrix_t & matrix) -> void

{

   auto xmlengine = gTools().xmlengine();

   void* matnode = xmlengine.NewChild(node, nullptr, name);


   xmlengine.NewAttr(matnode,nullptr,"Rows", gTools().StringFromInt(matrix.GetNrows()) );

   xmlengine.NewAttr(matnode,nullptr,"Columns", gTools().StringFromInt(matrix.GetNcols()) );

   std::stringstream s;

   s.precision( std::numeric_limits<Scalar_t>::digits10 );

   size_t nrows = matrix.GetNrows();

   size_t ncols = matrix.GetNcols();

   for (size_t row = 0; row < nrows; row++) {

      for (size_t col = 0; col < ncols; col++) {

         //TString tmp = TString::Format( "%5.15e ", matrix(row,col) );

         s << std::scientific <<  matrix(row,col) << "  ";

      }

   }


   xmlengine.AddRawLine( matnode, s.str().c_str() );

}


//_________________________________________________________________________________________________

template <typename Architecture_t>

auto VGeneralLayer<Architecture_t>::ReadMatrixXML(void * node, const char * name, Matrix_t & matrix) -> void

{

   void *matrixXML = gTools().GetChild(node, name);

   size_t rows, cols;

   gTools().ReadAttr(matrixXML, "Rows", rows);

   gTools().ReadAttr(matrixXML, "Columns", cols);


   R__ASSERT((size_t) matrix.GetNrows() == rows);

   R__ASSERT((size_t) matrix.GetNcols() == cols);


   TMatrixT<Scalar_t> tmatrix(rows, cols);


   const char * matrixString = gTools().xmlengine().GetNodeContent(matrixXML);

   std::stringstream matrixStringStream(matrixString);


   for (size_t i = 0; i < rows; i++)

   {

      for (size_t j = 0; j < cols; j++)

      {

#ifndef R__HAS_TMVAGPU

         matrixStringStream >> tmatrix(i,j);

#else

         Scalar_t value;

         matrixStringStream >> value;

         tmatrix(i,j) = value;

#endif


      }

   }


   // copy from tmatrix to matrix

   Matrix_t tmp( tmatrix);

   Architecture_t::Copy(matrix, tmp);


}


template <typename Architecture>

auto debugTensor(const typename Architecture::Tensor_t & A, const std::string name = "tensor") -> void

{

   Architecture::PrintTensor(A,name);

}


} // namespace DNN

} // namespace TMVA


#endif

size
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix

TError.h

R__ASSERT
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Definition TError.h:125

input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Definition TGWin32VirtualXProxy.cxx:142

value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Definition TGWin32VirtualXProxy.cxx:142

width
Option_t Option_t width
Definition TGWin32VirtualXProxy.cxx:56

height
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
Definition TGWin32VirtualXProxy.cxx:164

name
char name[80]
Definition TGX11.cxx:110

Tools.h

TMVA::DNN::VGeneralLayer
Generic General Layer class.
Definition GeneralLayer.h:51

TMVA::DNN::VGeneralLayer::fWeightGradients
std::vector< Matrix_t > fWeightGradients
Gradients w.r.t. the weights of the layer.
Definition GeneralLayer.h:74

TMVA::DNN::VGeneralLayer::fOutput
Tensor_t fOutput
Activations of this layer.
Definition GeneralLayer.h:77

TMVA::DNN::VGeneralLayer::GetWeightGradients
const std::vector< Matrix_t > & GetWeightGradients() const
Definition GeneralLayer.h:184

TMVA::DNN::VGeneralLayer::SetDropoutProbability
virtual void SetDropoutProbability(Scalar_t)
Set Dropout probability.
Definition GeneralLayer.h:160

TMVA::DNN::VGeneralLayer::CopyParameters
void CopyParameters(const VGeneralLayer< Arch > &layer)
Copy all trainable weight and biases from another equivalent layer but with different architecture Th...
Definition GeneralLayer.h:478

TMVA::DNN::VGeneralLayer::GetWeightsAt
const Matrix_t & GetWeightsAt(size_t i) const
Definition GeneralLayer.h:175

TMVA::DNN::VGeneralLayer::SetHeight
void SetHeight(size_t height)
Definition GeneralLayer.h:222

TMVA::DNN::VGeneralLayer::UpdateWeightGradients
void UpdateWeightGradients(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weight gradients, given some other weight gradients and learning rate.
Definition GeneralLayer.h:438

TMVA::DNN::VGeneralLayer::Initialize
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition GeneralLayer.h:395

TMVA::DNN::VGeneralLayer::GetBiasesAt
Matrix_t & GetBiasesAt(size_t i)
Definition GeneralLayer.h:182

TMVA::DNN::VGeneralLayer::SetInputHeight
void SetInputHeight(size_t inputHeight)
Definition GeneralLayer.h:219

TMVA::DNN::VGeneralLayer::fBiasGradients
std::vector< Matrix_t > fBiasGradients
Gradients w.r.t. the bias values of the layer.
Definition GeneralLayer.h:75

TMVA::DNN::VGeneralLayer::SetDepth
void SetDepth(size_t depth)
Definition GeneralLayer.h:221

TMVA::DNN::VGeneralLayer::SetExtraLayerParameters
virtual void SetExtraLayerParameters(const std::vector< Matrix_t > &)
Definition GeneralLayer.h:212

TMVA::DNN::VGeneralLayer::ReadWeightsFromXML
virtual void ReadWeightsFromXML(void *parent)=0
Read the information and the weights about the layer from XML node.

TMVA::DNN::VGeneralLayer::UpdateBiasGradients
void UpdateBiasGradients(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the bias gradients, given some other weight gradients and learning rate.
Definition GeneralLayer.h:448

TMVA::DNN::VGeneralLayer::SetBatchSize
void SetBatchSize(size_t batchSize)
Setters.
Definition GeneralLayer.h:217

TMVA::DNN::VGeneralLayer::CopyWeights
void CopyWeights(const std::vector< Matrix_t > &otherWeights)
Copies the weights provided as an input.
Definition GeneralLayer.h:458

TMVA::DNN::VGeneralLayer::fBatchSize
size_t fBatchSize
Batch size used for training and evaluation.
Definition GeneralLayer.h:59

TMVA::DNN::VGeneralLayer::AddWeightsXMLTo
virtual void AddWeightsXMLTo(void *parent)=0
Writes the information and the weights about the layer in an XML node.

TMVA::DNN::VGeneralLayer::UpdateWeights
void UpdateWeights(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weights, given the gradients and the learning rate,.
Definition GeneralLayer.h:418

TMVA::DNN::VGeneralLayer::Matrix_t
typename Architecture_t::Matrix_t Matrix_t
Definition GeneralLayer.h:54

TMVA::DNN::VGeneralLayer::GetBiasGradients
const std::vector< Matrix_t > & GetBiasGradients() const
Definition GeneralLayer.h:190

TMVA::DNN::VGeneralLayer::SetInputDepth
void SetInputDepth(size_t inputDepth)
Definition GeneralLayer.h:218

TMVA::DNN::VGeneralLayer::GetWeights
const std::vector< Matrix_t > & GetWeights() const
Definition GeneralLayer.h:172

TMVA::DNN::VGeneralLayer::GetDepth
size_t GetDepth() const
Definition GeneralLayer.h:167

TMVA::DNN::VGeneralLayer::GetWeights
std::vector< Matrix_t > & GetWeights()
Definition GeneralLayer.h:173

TMVA::DNN::VGeneralLayer::fWidth
size_t fWidth
The width of this layer.
Definition GeneralLayer.h:67

TMVA::DNN::VGeneralLayer::fInit
EInitialization fInit
The initialization method.
Definition GeneralLayer.h:80

TMVA::DNN::VGeneralLayer::fBiases
std::vector< Matrix_t > fBiases
The biases associated to the layer.
Definition GeneralLayer.h:72

TMVA::DNN::VGeneralLayer::SetIsTraining
void SetIsTraining(bool isTraining)
Definition GeneralLayer.h:224

TMVA::DNN::VGeneralLayer::fInputWidth
size_t fInputWidth
The width of the previous layer or input.
Definition GeneralLayer.h:63

TMVA::DNN::VGeneralLayer::fHeight
size_t fHeight
The height of the layer.
Definition GeneralLayer.h:66

TMVA::DNN::VGeneralLayer::Print
virtual void Print() const =0
Prints the info about the layer.

TMVA::DNN::VGeneralLayer::fInputDepth
size_t fInputDepth
The depth of the previous layer or input.
Definition GeneralLayer.h:61

TMVA::DNN::VGeneralLayer::SetWidth
void SetWidth(size_t width)
Definition GeneralLayer.h:223

TMVA::DNN::VGeneralLayer::fIsTraining
bool fIsTraining
Flag indicating the mode.
Definition GeneralLayer.h:69

TMVA::DNN::VGeneralLayer::GetOutput
const Tensor_t & GetOutput() const
Definition GeneralLayer.h:196

TMVA::DNN::VGeneralLayer::GetBiases
const std::vector< Matrix_t > & GetBiases() const
Definition GeneralLayer.h:178

TMVA::DNN::VGeneralLayer::Scalar_t
typename Architecture_t::Scalar_t Scalar_t
Definition GeneralLayer.h:55

TMVA::DNN::VGeneralLayer::GetBiasGradients
std::vector< Matrix_t > & GetBiasGradients()
Definition GeneralLayer.h:191

TMVA::DNN::VGeneralLayer::GetActivationGradients
Tensor_t & GetActivationGradients()
Definition GeneralLayer.h:200

TMVA::DNN::VGeneralLayer::fWeights
std::vector< Matrix_t > fWeights
The weights associated to the layer.
Definition GeneralLayer.h:71

TMVA::DNN::VGeneralLayer::GetInitialization
EInitialization GetInitialization() const
Definition GeneralLayer.h:214

TMVA::DNN::VGeneralLayer::fActivationGradients
Tensor_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Definition GeneralLayer.h:78

TMVA::DNN::VGeneralLayer::GetWeightsAt
Matrix_t & GetWeightsAt(size_t i)
Definition GeneralLayer.h:176

TMVA::DNN::VGeneralLayer::GetBiasGradientsAt
Matrix_t & GetBiasGradientsAt(size_t i)
Definition GeneralLayer.h:194

TMVA::DNN::VGeneralLayer::GetInputDepth
size_t GetInputDepth() const
Definition GeneralLayer.h:164

TMVA::DNN::VGeneralLayer::GetActivationGradientsAt
const Matrix_t & GetActivationGradientsAt(size_t i) const
Definition GeneralLayer.h:206

TMVA::DNN::VGeneralLayer::GetBiases
std::vector< Matrix_t > & GetBiases()
Definition GeneralLayer.h:179

TMVA::DNN::VGeneralLayer::GetExtraLayerParameters
virtual std::vector< Matrix_t > GetExtraLayerParameters() const
Definition GeneralLayer.h:210

TMVA::DNN::VGeneralLayer::WriteMatrixToXML
void WriteMatrixToXML(void *node, const char *name, const Matrix_t &matrix)
Definition GeneralLayer.h:521

TMVA::DNN::VGeneralLayer::GetActivationGradientsAt
Matrix_t GetActivationGradientsAt(size_t i)
Definition GeneralLayer.h:205

TMVA::DNN::VGeneralLayer::GetWeightGradients
std::vector< Matrix_t > & GetWeightGradients()
Definition GeneralLayer.h:185

TMVA::DNN::VGeneralLayer::GetActivationGradients
const Tensor_t & GetActivationGradients() const
Definition GeneralLayer.h:199

TMVA::DNN::VGeneralLayer::fInputHeight
size_t fInputHeight
The height of the previous layer or input.
Definition GeneralLayer.h:62

TMVA::DNN::VGeneralLayer::fDepth
size_t fDepth
The depth of the layer.
Definition GeneralLayer.h:65

TMVA::DNN::VGeneralLayer::Backward
virtual void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward)=0
Backpropagates the error.

TMVA::DNN::VGeneralLayer::CopyBiases
void CopyBiases(const std::vector< Matrix_t > &otherBiases)
Copies the biases provided as an input.
Definition GeneralLayer.h:468

TMVA::DNN::VGeneralLayer::Update
void Update(const Scalar_t learningRate)
Updates the weights and biases, given the learning rate.
Definition GeneralLayer.h:410

TMVA::DNN::VGeneralLayer::GetBiasesAt
const Matrix_t & GetBiasesAt(size_t i) const
Definition GeneralLayer.h:181

TMVA::DNN::VGeneralLayer::ResetTraining
virtual void ResetTraining()
Reset some training flags after a loop on all batches Some layer (e.g.
Definition GeneralLayer.h:121

TMVA::DNN::VGeneralLayer::GetInputHeight
size_t GetInputHeight() const
Definition GeneralLayer.h:165

TMVA::DNN::VGeneralLayer::SetInputWidth
void SetInputWidth(size_t inputWidth)
Definition GeneralLayer.h:220

TMVA::DNN::VGeneralLayer::GetBiasGradientsAt
const Matrix_t & GetBiasGradientsAt(size_t i) const
Definition GeneralLayer.h:193

TMVA::DNN::VGeneralLayer::WriteTensorToXML
void WriteTensorToXML(void *node, const char *name, const std::vector< Matrix_t > &tensor)
helper functions for XML
Definition GeneralLayer.h:496

TMVA::DNN::VGeneralLayer::GetBatchSize
size_t GetBatchSize() const
Getters.
Definition GeneralLayer.h:163

TMVA::DNN::VGeneralLayer::GetWeightGradientsAt
Matrix_t & GetWeightGradientsAt(size_t i)
Definition GeneralLayer.h:188

TMVA::DNN::VGeneralLayer::ReadMatrixXML
void ReadMatrixXML(void *node, const char *name, Matrix_t &matrix)
Definition GeneralLayer.h:544

TMVA::DNN::VGeneralLayer::Forward
virtual void Forward(Tensor_t &input, bool applyDropout=false)=0
Computes activation of the layer for the given input.

TMVA::DNN::VGeneralLayer::GetOutputAt
Matrix_t GetOutputAt(size_t i)
Definition GeneralLayer.h:202

TMVA::DNN::VGeneralLayer::GetWidth
size_t GetWidth() const
Definition GeneralLayer.h:169

TMVA::DNN::VGeneralLayer::GetHeight
size_t GetHeight() const
Definition GeneralLayer.h:168

TMVA::DNN::VGeneralLayer::GetWeightGradientsAt
const Matrix_t & GetWeightGradientsAt(size_t i) const
Definition GeneralLayer.h:187

TMVA::DNN::VGeneralLayer::UpdateBiases
void UpdateBiases(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the biases, given the gradients and the learning rate.
Definition GeneralLayer.h:428

TMVA::DNN::VGeneralLayer::Tensor_t
typename Architecture_t::Tensor_t Tensor_t
Definition GeneralLayer.h:53

TMVA::DNN::VGeneralLayer::IsTraining
bool IsTraining() const
Definition GeneralLayer.h:170

TMVA::DNN::VGeneralLayer::GetOutput
Tensor_t & GetOutput()
Definition GeneralLayer.h:197

TMVA::DNN::VGeneralLayer::~VGeneralLayer
virtual ~VGeneralLayer()
Virtual Destructor.
Definition GeneralLayer.h:388

TMVA::DNN::VGeneralLayer::GetOutputAt
const Matrix_t & GetOutputAt(size_t i) const
Definition GeneralLayer.h:203

TMVA::DNN::VGeneralLayer::VGeneralLayer
VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init)
Constructor.
Definition GeneralLayer.h:239

TMVA::DNN::VGeneralLayer::GetInputWidth
size_t GetInputWidth() const
Definition GeneralLayer.h:166

TMVA::Tools::xmlengine
TXMLEngine & xmlengine()
Definition Tools.h:262

TMVA::Tools::ReadAttr
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329

TMVA::Tools::GetChild
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1150

TMatrixT
TMatrixT.
Definition TMatrixT.h:40

TXMLEngine::NewChild
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition TXMLEngine.cxx:715

TXMLEngine::GetNodeContent
const char * GetNodeContent(XMLNodePointer_t xmlnode)
get contents (if any) of xmlnode
Definition TXMLEngine.cxx:1083

int

TMVA::DNN::EInitialization
EInitialization
Definition Functions.h:72

TMVA::DNN::EInitialization::kZero
@ kZero

TMVA::DNN::debugTensor
auto debugTensor(const typename Architecture::Tensor_t &A, const std::string name="tensor") -> void
Definition GeneralLayer.h:582

TMVA
create variable transformations
Definition GeneticMinimizer.h:22

TMVA::gTools
Tools & gTools()

Functions.h