Logo ROOT  
Reference Guide
TMVA::DNN::TBatchNormLayer< Architecture_t > Class Template Reference

template<typename Architecture_t>
class TMVA::DNN::TBatchNormLayer< Architecture_t >

Layer implementing Batch Normalization.

The input from each batch are normalized during training to have zero mean and unit variance and they are then scaled by two parameter, different for each input variable:

  • a scale factor gamma
  • an offset beta

In addition a running batch mean and variance is computed and stored in the class During inference the inputs are not normalized using the batch mean but the previously computed at running mean and variance If momentum is in [0,1) the running mean and variances are the exponetial averages using the momentum value runnig_mean = momentum * running_mean + (1-momentum) * batch_mean If instead momentum<1 the cumulative average is computed running_mean = (nb/(nb+1) * running_mean + 1/(nb+1) * batch_mean

See more at [https://arxiv.org/pdf/1502.03167v3.pdf]

Definition at line 63 of file BatchNormLayer.h.

Public Types

using BNormDescriptors_t = typename Architecture_t::BNormDescriptors_t
 
using HelperDescriptor_t = typename Architecture_t::TensorDescriptor_t
 
using Matrix_t = typename Architecture_t::Matrix_t
 
using Scalar_t = typename Architecture_t::Scalar_t
 
using Tensor_t = typename Architecture_t::Tensor_t
 

Public Member Functions

 TBatchNormLayer (const TBatchNormLayer &)
 Copy Constructor. More...
 
 TBatchNormLayer (size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth, const std::vector< size_t > &shape, int axis=-1, Scalar_t momentum=-1., Scalar_t epsilon=0.0001)
 Constructor. More...
 
 TBatchNormLayer (TBatchNormLayer< Architecture_t > *layer)
 Copy the dense layer provided as a pointer. More...
 
 ~TBatchNormLayer ()
 Destructor. More...
 
virtual void AddWeightsXMLTo (void *parent)
 Writes the information and the weights about the layer in an XML node. More...
 
void Backward (Tensor_t &gradients_backward, const Tensor_t &activations_backward)
 Compute weight, bias and activation gradients. More...
 
void Forward (Tensor_t &input, bool inTraining=true)
 Compute activation of the layer for the given input. More...
 
Matrix_tGetBatchMean ()
 
const Matrix_tGetBatchMean () const
 
Scalar_t GetEpsilon () const
 
std::vector< Matrix_tGetExtraLayerParameters () const
 
Matrix_tGetIVariance ()
 
const Matrix_tGetIVariance () const
 
Scalar_t GetMomentum () const
 
Matrix_tGetMuVector ()
 
const Matrix_tGetMuVector () const
 
Scalar_t GetNormAxis () const
 
int & GetNTrainedBatches ()
 
const int & GetNTrainedBatches () const
 
Matrix_tGetReshapedData ()
 
const Matrix_tGetReshapedData () const
 
Matrix_tGetVariance ()
 
const Matrix_tGetVariance () const
 
Matrix_tGetVarVector ()
 
const Matrix_tGetVarVector () const
 
virtual void Initialize ()
 Initialize the weights and biases according to the given initialization method. More...
 
void Print () const
 Printing the layer info. More...
 
virtual void ReadWeightsFromXML (void *parent)
 Read the information and the weights about the layer from XML node. More...
 
void ResetTraining ()
 Reset some training flags after a loop on all batches Some layer (e.g. More...
 
void SetExtraLayerParameters (const std::vector< Matrix_t > &params)
 
- Public Member Functions inherited from TMVA::DNN::VGeneralLayer< Architecture_t >
 VGeneralLayer (const VGeneralLayer &)
 Copy Constructor. More...
 
 VGeneralLayer (size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init)
 Constructor. More...
 
 VGeneralLayer (size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, size_t Width, size_t WeightsNSlices, std::vector< size_t > WeightsNRows, std::vector< size_t > WeightsNCols, size_t BiasesNSlices, std::vector< size_t > BiasesNRows, std::vector< size_t > BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init)
 General Constructor with different weights dimension. More...
 
 VGeneralLayer (VGeneralLayer< Architecture_t > *layer)
 Copy the layer provided as a pointer. More...
 
virtual ~VGeneralLayer ()
 Virtual Destructor. More...
 
virtual void AddWeightsXMLTo (void *parent)=0
 Writes the information and the weights about the layer in an XML node. More...
 
virtual void Backward (Tensor_t &gradients_backward, const Tensor_t &activations_backward)=0
 Backpropagates the error. More...
 
void CopyBiases (const std::vector< Matrix_t > &otherBiases)
 Copies the biases provided as an input. More...
 
template<typename Arch >
void CopyParameters (const VGeneralLayer< Arch > &layer)
 Copy all trainable weight and biases from another equivalent layer but with different architecture The function can copy also extra parameters in addition to weights and biases if they are return by the function GetExtraLayerParameters. More...
 
void CopyWeights (const std::vector< Matrix_t > &otherWeights)
 Copies the weights provided as an input. More...
 
virtual void Forward (Tensor_t &input, bool applyDropout=false)=0
 Computes activation of the layer for the given input. More...
 
Tensor_tGetActivationGradients ()
 
const Tensor_tGetActivationGradients () const
 
Matrix_t GetActivationGradientsAt (size_t i)
 
const Matrix_tGetActivationGradientsAt (size_t i) const
 
size_t GetBatchSize () const
 Getters. More...
 
std::vector< Matrix_t > & GetBiases ()
 
const std::vector< Matrix_t > & GetBiases () const
 
Matrix_tGetBiasesAt (size_t i)
 
const Matrix_tGetBiasesAt (size_t i) const
 
std::vector< Matrix_t > & GetBiasGradients ()
 
const std::vector< Matrix_t > & GetBiasGradients () const
 
Matrix_tGetBiasGradientsAt (size_t i)
 
const Matrix_tGetBiasGradientsAt (size_t i) const
 
size_t GetDepth () const
 
virtual std::vector< Matrix_tGetExtraLayerParameters () const
 
size_t GetHeight () const
 
EInitialization GetInitialization () const
 
size_t GetInputDepth () const
 
size_t GetInputHeight () const
 
size_t GetInputWidth () const
 
Tensor_tGetOutput ()
 
const Tensor_tGetOutput () const
 
Matrix_t GetOutputAt (size_t i)
 
const Matrix_tGetOutputAt (size_t i) const
 
std::vector< Matrix_t > & GetWeightGradients ()
 
const std::vector< Matrix_t > & GetWeightGradients () const
 
Matrix_tGetWeightGradientsAt (size_t i)
 
const Matrix_tGetWeightGradientsAt (size_t i) const
 
std::vector< Matrix_t > & GetWeights ()
 
const std::vector< Matrix_t > & GetWeights () const
 
Matrix_tGetWeightsAt (size_t i)
 
const Matrix_tGetWeightsAt (size_t i) const
 
size_t GetWidth () const
 
virtual void Initialize ()
 Initialize the weights and biases according to the given initialization method. More...
 
bool IsTraining () const
 
virtual void Print () const =0
 Prints the info about the layer. More...
 
void ReadMatrixXML (void *node, const char *name, Matrix_t &matrix)
 
virtual void ReadWeightsFromXML (void *parent)=0
 Read the information and the weights about the layer from XML node. More...
 
virtual void ResetTraining ()
 Reset some training flags after a loop on all batches Some layer (e.g. More...
 
void SetBatchSize (size_t batchSize)
 Setters. More...
 
void SetDepth (size_t depth)
 
virtual void SetDropoutProbability (Scalar_t)
 Set Dropout probability. More...
 
virtual void SetExtraLayerParameters (const std::vector< Matrix_t > &)
 
void SetHeight (size_t height)
 
void SetInputDepth (size_t inputDepth)
 
void SetInputHeight (size_t inputHeight)
 
void SetInputWidth (size_t inputWidth)
 
void SetIsTraining (bool isTraining)
 
void SetWidth (size_t width)
 
void Update (const Scalar_t learningRate)
 Updates the weights and biases, given the learning rate. More...
 
void UpdateBiases (const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
 Updates the biases, given the gradients and the learning rate. More...
 
void UpdateBiasGradients (const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
 Updates the bias gradients, given some other weight gradients and learning rate. More...
 
void UpdateWeightGradients (const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
 Updates the weight gradients, given some other weight gradients and learning rate. More...
 
void UpdateWeights (const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
 Updates the weights, given the gradients and the learning rate,. More...
 
void WriteMatrixToXML (void *node, const char *name, const Matrix_t &matrix)
 
void WriteTensorToXML (void *node, const char *name, const std::vector< Matrix_t > &tensor)
 helper functions for XML More...
 

Static Protected Member Functions

static size_t CalculateNormDim (int axis, size_t c, size_t h, size_t w)
 

Private Attributes

Tensor_t fDerivatives
 First fDerivatives of the activations of this layer. More...
 
TDescriptorsfDescriptors = nullptr
 
Scalar_t fEpsilon
 
Matrix_t fIVar
 
Scalar_t fMomentum
 The weight decay. More...
 
Matrix_t fMu
 
Matrix_t fMu_Training
 
int fNormAxis
 Normalization axis. For each element of this axis we will compute mean and stddev. More...
 
Tensor_t fReshapedData
 
int fTrainedBatches = 0
 
Matrix_t fVar
 
Matrix_t fVar_Training
 

Additional Inherited Members

- Protected Attributes inherited from TMVA::DNN::VGeneralLayer< Architecture_t >
Tensor_t fActivationGradients
 Gradients w.r.t. the activations of this layer. More...
 
size_t fBatchSize
 Batch size used for training and evaluation. More...
 
std::vector< Matrix_tfBiases
 The biases associated to the layer. More...
 
std::vector< Matrix_tfBiasGradients
 Gradients w.r.t. the bias values of the layer. More...
 
size_t fDepth
 The depth of the layer. More...
 
size_t fHeight
 The height of the layer. More...
 
EInitialization fInit
 The initialization method. More...
 
size_t fInputDepth
 The depth of the previous layer or input. More...
 
size_t fInputHeight
 The height of the previous layer or input. More...
 
size_t fInputWidth
 The width of the previous layer or input. More...
 
bool fIsTraining
 Flag indicating the mode. More...
 
Tensor_t fOutput
 Activations of this layer. More...
 
std::vector< Matrix_tfWeightGradients
 Gradients w.r.t. the weights of the layer. More...
 
std::vector< Matrix_tfWeights
 The weights associated to the layer. More...
 
size_t fWidth
 The width of this layer. More...
 

#include <TMVA/DNN/BatchNormLayer.h>

Inheritance diagram for TMVA::DNN::TBatchNormLayer< Architecture_t >:
[legend]

Member Typedef Documentation

◆ BNormDescriptors_t

template<typename Architecture_t >
using TMVA::DNN::TBatchNormLayer< Architecture_t >::BNormDescriptors_t = typename Architecture_t::BNormDescriptors_t

Definition at line 71 of file BatchNormLayer.h.

◆ HelperDescriptor_t

template<typename Architecture_t >
using TMVA::DNN::TBatchNormLayer< Architecture_t >::HelperDescriptor_t = typename Architecture_t::TensorDescriptor_t

Definition at line 70 of file BatchNormLayer.h.

◆ Matrix_t

template<typename Architecture_t >
using TMVA::DNN::TBatchNormLayer< Architecture_t >::Matrix_t = typename Architecture_t::Matrix_t

Definition at line 67 of file BatchNormLayer.h.

◆ Scalar_t

template<typename Architecture_t >
using TMVA::DNN::TBatchNormLayer< Architecture_t >::Scalar_t = typename Architecture_t::Scalar_t

Definition at line 66 of file BatchNormLayer.h.

◆ Tensor_t

template<typename Architecture_t >
using TMVA::DNN::TBatchNormLayer< Architecture_t >::Tensor_t = typename Architecture_t::Tensor_t

Definition at line 68 of file BatchNormLayer.h.

Constructor & Destructor Documentation

◆ TBatchNormLayer() [1/3]

template<typename Architecture_t >
TMVA::DNN::TBatchNormLayer< Architecture_t >::TBatchNormLayer ( size_t  batchSize,
size_t  inputDepth,
size_t  inputHeight,
size_t  inputWidth,
const std::vector< size_t > &  shape,
int  axis = -1,
Scalar_t  momentum = -1.,
Scalar_t  epsilon = 0.0001 
)

Constructor.

Definition at line 218 of file BatchNormLayer.h.

◆ TBatchNormLayer() [2/3]

template<typename Architecture_t >
TMVA::DNN::TBatchNormLayer< Architecture_t >::TBatchNormLayer ( TBatchNormLayer< Architecture_t > *  layer)

Copy the dense layer provided as a pointer.

Definition at line 241 of file BatchNormLayer.h.

◆ TBatchNormLayer() [3/3]

template<typename Architecture_t >
TMVA::DNN::TBatchNormLayer< Architecture_t >::TBatchNormLayer ( const TBatchNormLayer< Architecture_t > &  layer)

Copy Constructor.

Definition at line 250 of file BatchNormLayer.h.

◆ ~TBatchNormLayer()

template<typename Architecture_t >
TMVA::DNN::TBatchNormLayer< Architecture_t >::~TBatchNormLayer

Destructor.

Definition at line 258 of file BatchNormLayer.h.

Member Function Documentation

◆ AddWeightsXMLTo()

template<typename Architecture_t >
void TMVA::DNN::TBatchNormLayer< Architecture_t >::AddWeightsXMLTo ( void parent)
virtual

Writes the information and the weights about the layer in an XML node.

Implements TMVA::DNN::VGeneralLayer< Architecture_t >.

Definition at line 386 of file BatchNormLayer.h.

◆ Backward()

template<typename Architecture_t >
auto TMVA::DNN::TBatchNormLayer< Architecture_t >::Backward ( Tensor_t gradients_backward,
const Tensor_t activations_backward 
)
virtual

Compute weight, bias and activation gradients.

Uses the precomputed first partial derviatives of the activation function computed during forward propagation and modifies them. Must only be called directly a the corresponding call to Forward(...).

Implements TMVA::DNN::VGeneralLayer< Architecture_t >.

Definition at line 336 of file BatchNormLayer.h.

◆ CalculateNormDim()

template<typename Architecture_t >
static size_t TMVA::DNN::TBatchNormLayer< Architecture_t >::CalculateNormDim ( int  axis,
size_t  c,
size_t  h,
size_t  w 
)
inlinestaticprotected

Definition at line 198 of file BatchNormLayer.h.

◆ Forward()

template<typename Architecture_t >
auto TMVA::DNN::TBatchNormLayer< Architecture_t >::Forward ( Tensor_t input,
bool  inTraining = true 
)
virtual

Compute activation of the layer for the given input.

The input must be in 3D tensor form with the different matrices corresponding to different events in the batch. Computes activations as well as the first partial derivative of the activation function at those activations.

Implements TMVA::DNN::VGeneralLayer< Architecture_t >.

Definition at line 294 of file BatchNormLayer.h.

◆ GetBatchMean() [1/2]

template<typename Architecture_t >
Matrix_t & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetBatchMean ( )
inline

Definition at line 148 of file BatchNormLayer.h.

◆ GetBatchMean() [2/2]

template<typename Architecture_t >
const Matrix_t & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetBatchMean ( ) const
inline

Definition at line 147 of file BatchNormLayer.h.

◆ GetEpsilon()

template<typename Architecture_t >
Scalar_t TMVA::DNN::TBatchNormLayer< Architecture_t >::GetEpsilon ( ) const
inline

Definition at line 176 of file BatchNormLayer.h.

◆ GetExtraLayerParameters()

template<typename Architecture_t >
std::vector< Matrix_t > TMVA::DNN::TBatchNormLayer< Architecture_t >::GetExtraLayerParameters ( ) const
inlinevirtual

Reimplemented from TMVA::DNN::VGeneralLayer< Architecture_t >.

Definition at line 184 of file BatchNormLayer.h.

◆ GetIVariance() [1/2]

template<typename Architecture_t >
Matrix_t & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetIVariance ( )
inline

Definition at line 160 of file BatchNormLayer.h.

◆ GetIVariance() [2/2]

template<typename Architecture_t >
const Matrix_t & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetIVariance ( ) const
inline

Definition at line 159 of file BatchNormLayer.h.

◆ GetMomentum()

template<typename Architecture_t >
Scalar_t TMVA::DNN::TBatchNormLayer< Architecture_t >::GetMomentum ( ) const
inline

Definition at line 173 of file BatchNormLayer.h.

◆ GetMuVector() [1/2]

template<typename Architecture_t >
Matrix_t & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetMuVector ( )
inline

Definition at line 164 of file BatchNormLayer.h.

◆ GetMuVector() [2/2]

template<typename Architecture_t >
const Matrix_t & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetMuVector ( ) const
inline

Definition at line 163 of file BatchNormLayer.h.

◆ GetNormAxis()

template<typename Architecture_t >
Scalar_t TMVA::DNN::TBatchNormLayer< Architecture_t >::GetNormAxis ( ) const
inline

Definition at line 179 of file BatchNormLayer.h.

◆ GetNTrainedBatches() [1/2]

template<typename Architecture_t >
int & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetNTrainedBatches ( )
inline

Definition at line 144 of file BatchNormLayer.h.

◆ GetNTrainedBatches() [2/2]

template<typename Architecture_t >
const int & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetNTrainedBatches ( ) const
inline

Definition at line 143 of file BatchNormLayer.h.

◆ GetReshapedData() [1/2]

template<typename Architecture_t >
Matrix_t & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetReshapedData ( )
inline

Definition at line 182 of file BatchNormLayer.h.

◆ GetReshapedData() [2/2]

template<typename Architecture_t >
const Matrix_t & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetReshapedData ( ) const
inline

Definition at line 181 of file BatchNormLayer.h.

◆ GetVariance() [1/2]

template<typename Architecture_t >
Matrix_t & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetVariance ( )
inline

Definition at line 156 of file BatchNormLayer.h.

◆ GetVariance() [2/2]

template<typename Architecture_t >
const Matrix_t & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetVariance ( ) const
inline

Definition at line 155 of file BatchNormLayer.h.

◆ GetVarVector() [1/2]

template<typename Architecture_t >
Matrix_t & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetVarVector ( )
inline

Definition at line 168 of file BatchNormLayer.h.

◆ GetVarVector() [2/2]

template<typename Architecture_t >
const Matrix_t & TMVA::DNN::TBatchNormLayer< Architecture_t >::GetVarVector ( ) const
inline

Definition at line 167 of file BatchNormLayer.h.

◆ Initialize()

template<typename Architecture_t >
auto TMVA::DNN::TBatchNormLayer< Architecture_t >::Initialize
virtual

Initialize the weights and biases according to the given initialization method.

Reimplemented from TMVA::DNN::VGeneralLayer< Architecture_t >.

Definition at line 268 of file BatchNormLayer.h.

◆ Print()

template<typename Architecture_t >
void TMVA::DNN::TBatchNormLayer< Architecture_t >::Print
virtual

Printing the layer info.

Implements TMVA::DNN::VGeneralLayer< Architecture_t >.

Definition at line 368 of file BatchNormLayer.h.

◆ ReadWeightsFromXML()

template<typename Architecture_t >
void TMVA::DNN::TBatchNormLayer< Architecture_t >::ReadWeightsFromXML ( void parent)
virtual

Read the information and the weights about the layer from XML node.

Implements TMVA::DNN::VGeneralLayer< Architecture_t >.

Definition at line 411 of file BatchNormLayer.h.

◆ ResetTraining()

template<typename Architecture_t >
void TMVA::DNN::TBatchNormLayer< Architecture_t >::ResetTraining ( )
inlinevirtual

Reset some training flags after a loop on all batches Some layer (e.g.

batchnormalization) might need to implement the function in case some operations are needed after looping an all batches

Reimplemented from TMVA::DNN::VGeneralLayer< Architecture_t >.

Definition at line 128 of file BatchNormLayer.h.

◆ SetExtraLayerParameters()

template<typename Architecture_t >
void TMVA::DNN::TBatchNormLayer< Architecture_t >::SetExtraLayerParameters ( const std::vector< Matrix_t > &  params)
inlinevirtual

Reimplemented from TMVA::DNN::VGeneralLayer< Architecture_t >.

Definition at line 191 of file BatchNormLayer.h.

Member Data Documentation

◆ fDerivatives

template<typename Architecture_t >
Tensor_t TMVA::DNN::TBatchNormLayer< Architecture_t >::fDerivatives
private

First fDerivatives of the activations of this layer.

Definition at line 76 of file BatchNormLayer.h.

◆ fDescriptors

template<typename Architecture_t >
TDescriptors* TMVA::DNN::TBatchNormLayer< Architecture_t >::fDescriptors = nullptr
private

Definition at line 96 of file BatchNormLayer.h.

◆ fEpsilon

template<typename Architecture_t >
Scalar_t TMVA::DNN::TBatchNormLayer< Architecture_t >::fEpsilon
private

Definition at line 81 of file BatchNormLayer.h.

◆ fIVar

template<typename Architecture_t >
Matrix_t TMVA::DNN::TBatchNormLayer< Architecture_t >::fIVar
private

Definition at line 85 of file BatchNormLayer.h.

◆ fMomentum

template<typename Architecture_t >
Scalar_t TMVA::DNN::TBatchNormLayer< Architecture_t >::fMomentum
private

The weight decay.

Definition at line 80 of file BatchNormLayer.h.

◆ fMu

template<typename Architecture_t >
Matrix_t TMVA::DNN::TBatchNormLayer< Architecture_t >::fMu
private

Definition at line 83 of file BatchNormLayer.h.

◆ fMu_Training

template<typename Architecture_t >
Matrix_t TMVA::DNN::TBatchNormLayer< Architecture_t >::fMu_Training
private

Definition at line 87 of file BatchNormLayer.h.

◆ fNormAxis

template<typename Architecture_t >
int TMVA::DNN::TBatchNormLayer< Architecture_t >::fNormAxis
private

Normalization axis. For each element of this axis we will compute mean and stddev.

Definition at line 78 of file BatchNormLayer.h.

◆ fReshapedData

template<typename Architecture_t >
Tensor_t TMVA::DNN::TBatchNormLayer< Architecture_t >::fReshapedData
private

Definition at line 91 of file BatchNormLayer.h.

◆ fTrainedBatches

template<typename Architecture_t >
int TMVA::DNN::TBatchNormLayer< Architecture_t >::fTrainedBatches = 0
private

Definition at line 94 of file BatchNormLayer.h.

◆ fVar

template<typename Architecture_t >
Matrix_t TMVA::DNN::TBatchNormLayer< Architecture_t >::fVar
private

Definition at line 84 of file BatchNormLayer.h.

◆ fVar_Training

template<typename Architecture_t >
Matrix_t TMVA::DNN::TBatchNormLayer< Architecture_t >::fVar_Training
private

Definition at line 88 of file BatchNormLayer.h.


The documentation for this class was generated from the following file: