doc/v618/DenseLayer_8h_source.html

// Author: Vladimir Ilievski


/**********************************************************************************

 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *

 * Package: TMVA                                                                  *

 * Class  : TDenseLayer                                                           *

 * Web    : http://tmva.sourceforge.net                                           *

 *                                                                                *

 * Description:                                                                   *

 *      Dense Layer Class                                                         *

 *                                                                                *

 * Authors (alphabetical):                                                        *

 *      Vladimir Ilievski      <ilievski.vladimir@live.com>  - CERN, Switzerland  *

 *                                                                                *

 * Copyright (c) 2005-2015:                                                       *

 *      CERN, Switzerland                                                         *

 *      U. of Victoria, Canada                                                    *

 *      MPI-K Heidelberg, Germany                                                 *

 *      U. of Bonn, Germany                                                       *

 *                                                                                *

 * Redistribution and use in source and binary forms, with or without             *

 * modification, are permitted according to the terms listed in LICENSE           *

 * (http://tmva.sourceforge.net/LICENSE)                                          *

 **********************************************************************************/


#ifndef TMVA_DNN_DENSELAYER

#define TMVA_DNN_DENSELAYER


#include "TMatrix.h"


#include "TMVA/DNN/GeneralLayer.h"

#include "TMVA/DNN/Functions.h"


#include <iostream>

#include <iomanip>


namespace TMVA {

namespace DNN {

/** \class TDenseLayer


Generic layer class.


This generic layer class represents a dense layer of a neural network with

a given width n and activation function f. The activation function of each

layer is given by \f$\mathbf{u} = \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$.


In addition to the weight and bias matrices, each layer allocates memory

for its activations and the corresponding first partial fDerivatives of

the activation function as well as the gradients of the weights and biases.


The layer provides member functions for the forward propagation of

activations through the given layer.

*/

template <typename Architecture_t>

class TDenseLayer : public VGeneralLayer<Architecture_t> {

public:

   using Scalar_t = typename Architecture_t::Scalar_t;

   using Matrix_t = typename Architecture_t::Matrix_t;


private:

   std::vector<Matrix_t> fDerivatives; ///< First fDerivatives of the activations of this layer.


   Scalar_t fDropoutProbability; ///< Probability that an input is active.


   EActivationFunction fF; ///< Activation function of the layer.

   ERegularization fReg;   ///< The regularization method.

   Scalar_t fWeightDecay;  ///< The weight decay.


public:

   /*! Constructor */

   TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability,

               EActivationFunction f, ERegularization reg, Scalar_t weightDecay);


   /*! Copy the dense layer provided as a pointer */

   TDenseLayer(TDenseLayer<Architecture_t> *layer);


   /*! Copy Constructor */

   TDenseLayer(const TDenseLayer &);


   /*! Destructor */

   ~TDenseLayer();


   /*! Compute activation of the layer for the given input. The input

    * must be in 3D tensor form with the different matrices corresponding to

    * different events in the batch. Computes activations as well as

    * the first partial derivative of the activation function at those

    * activations. */

   void Forward(std::vector<Matrix_t> &input, bool applyDropout = false);


   /*! Compute weight, bias and activation gradients. Uses the precomputed

    *  first partial derviatives of the activation function computed during

    *  forward propagation and modifies them. Must only be called directly

    *  a the corresponding call to Forward(...). */

   void Backward(std::vector<Matrix_t> &gradients_backward, const std::vector<Matrix_t> &activations_backward,

                 std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);


   /*! Printing the layer info. */

   void Print() const;


   /*! Writes the information and the weights about the layer in an XML node. */

   virtual void AddWeightsXMLTo(void *parent);


   /*! Read the information and the weights about the layer from XML node. */

   virtual void ReadWeightsFromXML(void *parent);


   /*! Set dropout probabilities */

   virtual void SetDropoutProbability(Scalar_t dropoutProbability) { fDropoutProbability = dropoutProbability; }


   /*! Getters */

   Scalar_t GetDropoutProbability() const { return fDropoutProbability; }


   const std::vector<Matrix_t> &GetDerivatives() const { return fDerivatives; }

   std::vector<Matrix_t> &GetDerivatives() { return fDerivatives; }


   Matrix_t &GetDerivativesAt(size_t i) { return fDerivatives[i]; }

   const Matrix_t &GetDerivativesAt(size_t i) const { return fDerivatives[i]; }


   EActivationFunction GetActivationFunction() const { return fF; }

   ERegularization GetRegularization() const { return fReg; }

   Scalar_t GetWeightDecay() const { return fWeightDecay; }

};


//

//

//  The Dense Layer Class - Implementation

//______________________________________________________________________________

template <typename Architecture_t>

TDenseLayer<Architecture_t>::TDenseLayer(size_t batchSize, size_t inputWidth, size_t width, EInitialization init,

                                         Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg,

                                         Scalar_t weightDecay)

   : VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1, width, 1, width, inputWidth, 1, width, 1, 1,

                                   batchSize, width, init),

     fDerivatives(), fDropoutProbability(dropoutProbability), fF(f), fReg(reg), fWeightDecay(weightDecay)

{

   fDerivatives.emplace_back(batchSize, width);

}


//______________________________________________________________________________

template <typename Architecture_t>

TDenseLayer<Architecture_t>::TDenseLayer(TDenseLayer<Architecture_t> *layer)

   : VGeneralLayer<Architecture_t>(layer), fDerivatives(), fDropoutProbability(layer->GetDropoutProbability()),

     fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay())

{

   fDerivatives.emplace_back(layer->GetBatchSize(), layer->GetWidth());

}


//______________________________________________________________________________

template <typename Architecture_t>

TDenseLayer<Architecture_t>::TDenseLayer(const TDenseLayer &layer)

   : VGeneralLayer<Architecture_t>(layer), fDerivatives(), fDropoutProbability(layer.fDropoutProbability), fF(layer.fF),

     fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)

{

   fDerivatives.emplace_back(layer.fBatchSize, layer.fWidth);

}


//______________________________________________________________________________

template <typename Architecture_t>

TDenseLayer<Architecture_t>::~TDenseLayer()

{

   // Nothing to do here.

}


//______________________________________________________________________________

template <typename Architecture_t>

auto TDenseLayer<Architecture_t>::Forward(std::vector<Matrix_t> &input, bool applyDropout) -> void

{

   if (applyDropout && (this->GetDropoutProbability() != 1.0)) {

      Architecture_t::Dropout(input[0], this->GetDropoutProbability());

   }

   Architecture_t::MultiplyTranspose(this->GetOutputAt(0), input[0], this->GetWeightsAt(0));

   Architecture_t::AddRowWise(this->GetOutputAt(0), this->GetBiasesAt(0));

   evaluateDerivative<Architecture_t>(this->GetDerivativesAt(0), this->GetActivationFunction(), this->GetOutputAt(0));

   evaluate<Architecture_t>(this->GetOutputAt(0), this->GetActivationFunction());

}


//______________________________________________________________________________

template <typename Architecture_t>

auto TDenseLayer<Architecture_t>::Backward(std::vector<Matrix_t> &gradients_backward,

                                           const std::vector<Matrix_t> &activations_backward,

                                           std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &

                                           /*inp2*/) -> void

{

   if (gradients_backward.size() == 0) {

      Matrix_t dummy(0, 0);

      Architecture_t::Backward(dummy, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),

                               this->GetDerivativesAt(0), this->GetActivationGradientsAt(0), this->GetWeightsAt(0),

                               activations_backward[0]);


   } else {

      Architecture_t::Backward(gradients_backward[0], this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),

                               this->GetDerivativesAt(0), this->GetActivationGradientsAt(0), this->GetWeightsAt(0),

                               activations_backward[0]);

   }


   addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),

                                              this->GetWeightDecay(), this->GetRegularization());

}


//______________________________________________________________________________

template <typename Architecture_t>

void TDenseLayer<Architecture_t>::Print() const

{

   std::cout << " DENSE Layer: \t";

   std::cout << " ( Input =" << std::setw(6) << this->GetWeightsAt(0).GetNcols();  // input size

   std::cout << " , Width =" << std::setw(6) << this->GetWeightsAt(0).GetNrows() << " ) ";  // layer width

   if (this->GetOutput().size() > 0) {

      std::cout << "\tOutput = ( " << std::setw(2) << this->GetOutput().size() << " ," << std::setw(6) << this->GetOutput()[0].GetNrows() << " ," << std::setw(6) << this->GetOutput()[0].GetNcols() << " ) ";

   }

   std::vector<std::string> activationNames = { "Identity","Relu","Sigmoid","Tanh","SymmRelu","SoftSign","Gauss" };

   std::cout << "\t Activation Function = ";

   std::cout << activationNames[ static_cast<int>(fF) ];

   if (fDropoutProbability != 1.) std::cout << "\t Dropout prob. = " << fDropoutProbability;

   std::cout << std::endl;

}


//______________________________________________________________________________


template <typename Architecture_t>

void TDenseLayer<Architecture_t>::AddWeightsXMLTo(void *parent)

{

  // write layer width activation function + weigbht and bias matrices


   auto layerxml = gTools().xmlengine().NewChild(parent, 0, "DenseLayer");


   gTools().xmlengine().NewAttr(layerxml, 0, "Width", gTools().StringFromInt(this->GetWidth()));


   int activationFunction = static_cast<int>(this -> GetActivationFunction());

   gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",

                                TString::Itoa(activationFunction, 10));

   // write weights and bias matrix

   this->WriteMatrixToXML(layerxml, "Weights", this -> GetWeightsAt(0));

   this->WriteMatrixToXML(layerxml, "Biases",  this -> GetBiasesAt(0));

}


//______________________________________________________________________________

template <typename Architecture_t>

void TDenseLayer<Architecture_t>::ReadWeightsFromXML(void *parent)

{

   // Read layer weights and biases from XML

   this->ReadMatrixXML(parent,"Weights", this -> GetWeightsAt(0));

   this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));


}


} // namespace DNN

} // namespace TMVA


#endif

GeneralLayer.h

f
#define f(i)
Definition: RSha256.hxx:104

init
static Int_t init()
Definition: RooClassFactory.cxx:52

dummy
static RooMathCoreReg dummy
Definition: RooMathCoreReg.cxx:27

width
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121

TMatrix.h

TMVA::DNN::TDenseLayer
Generic layer class.
Definition: DenseLayer.h:56

TMVA::DNN::TDenseLayer::fReg
ERegularization fReg
The regularization method.
Definition: DenseLayer.h:67

TMVA::DNN::TDenseLayer::GetRegularization
ERegularization GetRegularization() const
Definition: DenseLayer.h:120

TMVA::DNN::TDenseLayer::fDerivatives
std::vector< Matrix_t > fDerivatives
First fDerivatives of the activations of this layer.
Definition: DenseLayer.h:62

TMVA::DNN::TDenseLayer::Backward
void Backward(std::vector< Matrix_t > &gradients_backward, const std::vector< Matrix_t > &activations_backward, std::vector< Matrix_t > &inp1, std::vector< Matrix_t > &inp2)
Compute weight, bias and activation gradients.
Definition: DenseLayer.h:179

TMVA::DNN::TDenseLayer::GetDerivatives
std::vector< Matrix_t > & GetDerivatives()
Definition: DenseLayer.h:114

TMVA::DNN::TDenseLayer::GetDerivativesAt
Matrix_t & GetDerivativesAt(size_t i)
Definition: DenseLayer.h:116

TMVA::DNN::TDenseLayer::Matrix_t
typename Architecture_t::Matrix_t Matrix_t
Definition: DenseLayer.h:59

TMVA::DNN::TDenseLayer::fWeightDecay
Scalar_t fWeightDecay
The weight decay.
Definition: DenseLayer.h:68

TMVA::DNN::TDenseLayer::Forward
void Forward(std::vector< Matrix_t > &input, bool applyDropout=false)
Compute activation of the layer for the given input.
Definition: DenseLayer.h:166

TMVA::DNN::TDenseLayer::fF
EActivationFunction fF
Activation function of the layer.
Definition: DenseLayer.h:66

TMVA::DNN::TDenseLayer::TDenseLayer
TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability, EActivationFunction f, ERegularization reg, Scalar_t weightDecay)
Constructor.
Definition: DenseLayer.h:129

TMVA::DNN::TDenseLayer::Print
void Print() const
Printing the layer info.
Definition: DenseLayer.h:202

TMVA::DNN::TDenseLayer::~TDenseLayer
~TDenseLayer()
Destructor.
Definition: DenseLayer.h:159

TMVA::DNN::TDenseLayer::AddWeightsXMLTo
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Definition: DenseLayer.h:220

TMVA::DNN::TDenseLayer::GetDropoutProbability
Scalar_t GetDropoutProbability() const
Getters.
Definition: DenseLayer.h:111

TMVA::DNN::TDenseLayer::GetActivationFunction
EActivationFunction GetActivationFunction() const
Definition: DenseLayer.h:119

TMVA::DNN::TDenseLayer::SetDropoutProbability
virtual void SetDropoutProbability(Scalar_t dropoutProbability)
Set dropout probabilities.
Definition: DenseLayer.h:108

TMVA::DNN::TDenseLayer::Scalar_t
typename Architecture_t::Scalar_t Scalar_t
Definition: DenseLayer.h:58

TMVA::DNN::TDenseLayer::ReadWeightsFromXML
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
Definition: DenseLayer.h:238

TMVA::DNN::TDenseLayer::fDropoutProbability
Scalar_t fDropoutProbability
Probability that an input is active.
Definition: DenseLayer.h:64

TMVA::DNN::TDenseLayer::GetWeightDecay
Scalar_t GetWeightDecay() const
Definition: DenseLayer.h:121

TMVA::DNN::TDenseLayer::GetDerivatives
const std::vector< Matrix_t > & GetDerivatives() const
Definition: DenseLayer.h:113

TMVA::DNN::TDenseLayer::GetDerivativesAt
const Matrix_t & GetDerivativesAt(size_t i) const
Definition: DenseLayer.h:117

TMVA::DNN::VGeneralLayer
Generic General Layer class.
Definition: GeneralLayer.h:46

TMVA::DNN::VGeneralLayer::fBatchSize
size_t fBatchSize
Batch size used for training and evaluation.
Definition: GeneralLayer.h:51

TMVA::DNN::VGeneralLayer::fWidth
size_t fWidth
The width of this layer.
Definition: GeneralLayer.h:59

TMVA::DNN::VGeneralLayer::GetBatchSize
size_t GetBatchSize() const
Getters.
Definition: GeneralLayer.h:144

TMVA::DNN::VGeneralLayer::GetWidth
size_t GetWidth() const
Definition: GeneralLayer.h:150

TMVA::Tools::xmlengine
TXMLEngine & xmlengine()
Definition: Tools.h:270

TString::Itoa
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
Definition: TString.cxx:2025

TXMLEngine::NewAttr
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
Definition: TXMLEngine.cxx:580

TXMLEngine::NewChild
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:709

TMVA::DNN::EInitialization
EInitialization
Definition: Functions.h:70

TMVA::DNN::weightDecay
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:496

TMVA::DNN::ERegularization
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:63

TMVA::DNN::EActivationFunction
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:32

TMVA
create variable transformations
Definition: GeneticMinimizer.h:21

TMVA::gTools
Tools & gTools()

Functions.h