42template <
typename Architecture_t, 
typename Layer_t = VGeneralLayer<Architecture_t>,
 
   43          typename DeepNet_t = TDeepNet<Architecture_t, Layer_t>>
 
   46   using Matrix_t = 
typename Architecture_t::Matrix_t;
 
   47   using Scalar_t = 
typename Architecture_t::Scalar_t;
 
   65   void UpdateWeights(
size_t layerIndex, std::vector<Matrix_t> &weights, 
const std::vector<Matrix_t> &weightGradients);
 
   68   void UpdateBiases(
size_t layerIndex, std::vector<Matrix_t> &biases, 
const std::vector<Matrix_t> &biasGradients);
 
  100template <
typename Architecture_t, 
typename Layer_t, 
typename DeepNet_t>
 
  103   : 
VOptimizer<Architecture_t, Layer_t, DeepNet_t>(learningRate, deepNet), fBeta1(beta1), fBeta2(beta2),
 
  106   std::vector<Layer_t *> &layers = deepNet.
GetLayers();
 
  107   const size_t layersNSlices = layers.size();
 
  113   for (
size_t i = 0; i < layersNSlices; i++) {
 
  114      const size_t weightsNSlices = (layers[i]->GetWeights()).size();
 
  116      for (
size_t j = 0; j < weightsNSlices; j++) {
 
  117         Matrix_t ¤tWeights = layers[i]->GetWeightsAt(j);
 
  118         const size_t weightsNRows = currentWeights.GetNrows();
 
  119         const size_t weightsNCols = currentWeights.GetNcols();
 
  127      const size_t biasesNSlices = (layers[i]->GetBiases()).size();
 
  129      for (
size_t j = 0; j < biasesNSlices; j++) {
 
  130         Matrix_t ¤tBiases = layers[i]->GetBiasesAt(j);
 
  131         const size_t biasesNRows = currentBiases.GetNrows();
 
  132         const size_t biasesNCols = currentBiases.GetNcols();
 
  143template <
typename Architecture_t, 
typename Layer_t, 
typename DeepNet_t>
 
  145                                                              const std::vector<Matrix_t> &weightGradients) -> 
void 
  147   std::vector<Matrix_t> ¤tLayerFirstMomentWeights = this->GetFirstMomentWeightsAt(layerIndex);
 
  148   std::vector<Matrix_t> ¤tLayerSecondMomentWeights = this->GetSecondMomentWeightsAt(layerIndex);
 
  151   Scalar_t alpha = (this->GetLearningRate()) * (
sqrt(1 - 
pow(this->GetBeta2(), this->GetGlobalStep()))) /
 
  152                    (1 - 
pow(this->GetBeta1(), this->GetGlobalStep()));
 
  155   for (
size_t i = 0; i < weights.size(); i++) {
 
  157      Architecture_t::AdamUpdateFirstMom(currentLayerFirstMomentWeights[i], weightGradients[i], this->GetBeta1() );
 
  159      Architecture_t::AdamUpdateSecondMom(currentLayerSecondMomentWeights[i], weightGradients[i], this->GetBeta2() );
 
  161      Architecture_t::AdamUpdate(weights[i], currentLayerFirstMomentWeights[i], currentLayerSecondMomentWeights[i],
 
  162                                 alpha, this->GetEpsilon() );
 
  167template <
typename Architecture_t, 
typename Layer_t, 
typename DeepNet_t>
 
  169                                                             const std::vector<Matrix_t> &biasGradients) -> 
void 
  171   std::vector<Matrix_t> ¤tLayerFirstMomentBiases = this->GetFirstMomentBiasesAt(layerIndex);
 
  172   std::vector<Matrix_t> ¤tLayerSecondMomentBiases = this->GetSecondMomentBiasesAt(layerIndex);
 
  175   Scalar_t alpha = (this->GetLearningRate()) * (
sqrt(1 - 
pow(this->GetBeta2(), this->GetGlobalStep()))) /
 
  176                    (1 - 
pow(this->GetBeta1(), this->GetGlobalStep()));
 
  179   for (
size_t i = 0; i < biases.size(); i++) {
 
  181      Architecture_t::AdamUpdateFirstMom(currentLayerFirstMomentBiases[i], biasGradients[i], this->GetBeta1() );
 
  183      Architecture_t::AdamUpdateSecondMom(currentLayerSecondMomentBiases[i], biasGradients[i], this->GetBeta2() );
 
  185      Architecture_t::AdamUpdate(biases[i], currentLayerFirstMomentBiases[i], currentLayerSecondMomentBiases[i],
 
  186                                 alpha, this->GetEpsilon() );
 
double pow(double, double)
std::vector< std::vector< Matrix_t > > fSecondMomentWeights
The decaying average of the second moment of the past weight gradients associated with the deep net.
std::vector< Matrix_t > & GetSecondMomentBiasesAt(size_t i)
Scalar_t GetEpsilon() const
typename Architecture_t::Matrix_t Matrix_t
std::vector< Matrix_t > & GetFirstMomentBiasesAt(size_t i)
Scalar_t fBeta2
The Beta2 constant used by the optimizer.
std::vector< std::vector< Matrix_t > > fSecondMomentBiases
The decaying average of the second moment of the past bias gradients associated with the deep net.
std::vector< std::vector< Matrix_t > > & GetSecondMomentBiases()
~TAdam()=default
Destructor.
std::vector< Matrix_t > & GetFirstMomentWeightsAt(size_t i)
std::vector< std::vector< Matrix_t > > & GetSecondMomentWeights()
TAdam(DeepNet_t &deepNet, Scalar_t learningRate=0.001, Scalar_t beta1=0.9, Scalar_t beta2=0.999, Scalar_t epsilon=1e-8)
Constructor.
Scalar_t GetBeta2() const
std::vector< std::vector< Matrix_t > > fFirstMomentBiases
The decaying average of the first moment of the past bias gradients associated with the deep net.
Scalar_t fEpsilon
The Smoothing term used to avoid division by zero.
void UpdateWeights(size_t layerIndex, std::vector< Matrix_t > &weights, const std::vector< Matrix_t > &weightGradients)
Update the weights, given the current weight gradients.
std::vector< std::vector< Matrix_t > > & GetFirstMomentWeights()
std::vector< Matrix_t > & GetSecondMomentWeightsAt(size_t i)
std::vector< std::vector< Matrix_t > > fFirstMomentWeights
The decaying average of the first moment of the past weight gradients associated with the deep net.
Scalar_t fBeta1
The Beta1 constant used by the optimizer.
void UpdateBiases(size_t layerIndex, std::vector< Matrix_t > &biases, const std::vector< Matrix_t > &biasGradients)
Update the biases, given the current bias gradients.
std::vector< std::vector< Matrix_t > > & GetFirstMomentBiases()
typename Architecture_t::Scalar_t Scalar_t
Scalar_t GetBeta1() const
Getters.
std::vector< Layer_t * > & GetLayers()
create variable transformations