27#ifndef TMVA_DNN_DENSELAYER
28#define TMVA_DNN_DENSELAYER
58template <
typename Architecture_t>
62 using Scalar_t =
typename Architecture_t::Scalar_t;
63 using Matrix_t =
typename Architecture_t::Matrix_t;
64 using Tensor_t =
typename Architecture_t::Tensor_t;
135template <
typename Architecture_t>
139 :
VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1,
width, 1,
width, inputWidth, 1,
width, 1, 1,
140 batchSize,
width, init),
141 fInputActivation(), fDropoutProbability(dropoutProbability), fF(
f), fReg(reg), fWeightDecay(
weightDecay)
151template <
typename Architecture_t>
154 fInputActivation( layer->GetInputActivation().GetShape() ),
155 fDropoutProbability(layer->GetDropoutProbability()),
156 fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay())
163template <
typename Architecture_t>
166 fInputActivation( layer->GetInputActivation()),
167 fDropoutProbability(layer.fDropoutProbability),
168 fF(layer.fF), fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)
175template <
typename Architecture_t>
179 Architecture_t::ReleaseDescriptor(fActivationDesc);
186template <
typename Architecture_t>
189 if (applyDropout && (this->GetDropoutProbability() != 1.0)) {
191 Architecture_t::DropoutForward(input,
static_cast<TDescriptors *
> (
nullptr),
193 this->GetDropoutProbability());
195 Architecture_t::MultiplyTranspose(this->GetOutput() , input, this->GetWeightsAt(0));
196 Architecture_t::AddRowWise(this->GetOutput(), this->GetBiasesAt(0));
199 Architecture_t::Copy(this->GetInputActivation(),this->GetOutput());
201 Architecture_t::ActivationFunctionForward(this->GetOutput(), this->GetActivationFunction(), fActivationDesc);
205template <
typename Architecture_t>
211 if (this->GetDropoutProbability() != 1.0) {
212 Architecture_t::DropoutBackward(this->GetActivationGradients(),
217 Architecture_t::ActivationFunctionBackward(fDerivatives, this->GetOutput(),
218 this->GetActivationGradients(), this->GetInputActivation(),
219 this->GetActivationFunction(), fActivationDesc);
221 Architecture_t::Backward(gradients_backward, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
222 fDerivatives, this->GetActivationGradients(), this->GetWeightsAt(0),
223 activations_backward);
225 addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),
226 this->GetWeightDecay(), this->GetRegularization());
230template <
typename Architecture_t>
233 std::cout <<
" DENSE Layer: \t";
234 std::cout <<
" ( Input =" << std::setw(6) << this->GetWeightsAt(0).GetNcols();
235 std::cout <<
" , Width =" << std::setw(6) << this->GetWeightsAt(0).GetNrows() <<
" ) ";
237 std::cout <<
"\tOutput = ( " << std::setw(2) << this->GetOutput().GetFirstSize() <<
" ," << std::setw(6) << this->GetOutput().GetShape()[0] <<
" ," << std::setw(6) << this->GetOutput().GetShape()[1] <<
" ) ";
239 std::vector<std::string> activationNames = {
"Identity",
"Relu",
"Sigmoid",
"Tanh",
"SymmRelu",
"SoftSign",
"Gauss" };
240 std::cout <<
"\t Activation Function = ";
241 std::cout << activationNames[ static_cast<int>(fF) ];
242 if (fDropoutProbability != 1.) std::cout <<
"\t Dropout prob. = " << fDropoutProbability;
243 std::cout << std::endl;
248template <
typename Architecture_t>
257 int activationFunction =
static_cast<int>(
this -> GetActivationFunction());
261 this->WriteMatrixToXML(layerxml,
"Weights",
this -> GetWeightsAt(0));
262 this->WriteMatrixToXML(layerxml,
"Biases",
this -> GetBiasesAt(0));
266template <
typename Architecture_t>
270 this->ReadMatrixXML(parent,
"Weights",
this -> GetWeightsAt(0));
271 this->ReadMatrixXML(parent,
"Biases",
this -> GetBiasesAt(0));
include TDocParser_001 C image html pict1_TDocParser_001 png width
ERegularization fReg
The regularization method.
Tensor_t fDerivatives
output of GEMM and input to activation function
ERegularization GetRegularization() const
void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward)
Compute weight, bias and activation gradients.
typename Architecture_t::Matrix_t Matrix_t
const Tensor_t & GetInputActivation() const
Scalar_t fWeightDecay
The weight decay.
Tensor_t fInputActivation
Architecture_t::ActivationDescriptor_t fActivationDesc
EActivationFunction fF
Activation function of the layer.
TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability, EActivationFunction f, ERegularization reg, Scalar_t weightDecay)
Constructor.
void Print() const
std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);
~TDenseLayer()
Destructor.
virtual void AddWeightsXMLTo(void *parent)
Writes the information and the weights about the layer in an XML node.
Scalar_t GetDropoutProbability() const
Getters.
Tensor_t & GetInputActivation()
EActivationFunction GetActivationFunction() const
void Forward(Tensor_t &input, bool applyDropout=false)
Compute activation of the layer for the given input.
virtual void SetDropoutProbability(Scalar_t dropoutProbability)
Set dropout probabilities.
typename Architecture_t::Scalar_t Scalar_t
virtual void ReadWeightsFromXML(void *parent)
Read the information and the weights about the layer from XML node.
typename Architecture_t::Tensor_t Tensor_t
Scalar_t fDropoutProbability
activation functgion gradient
Scalar_t GetWeightDecay() const
Generic General Layer class.
const Tensor_t & GetOutput() const
static TString Itoa(Int_t value, Int_t base)
Converts an Int_t to a TString with respect to the base specified (2-36).
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
XMLAttrPointer_t NewAttr(XMLNodePointer_t xmlnode, XMLNsPointer_t, const char *name, const char *value)
creates new attribute for xmlnode, namespaces are not supported for attributes
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
ERegularization
Enum representing the regularization type applied for a given layer.
EActivationFunction
Enum that represents layer activation functions.
create variable transformations