ROOT   Reference Guide
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Ravi Kiran S
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
12 * *
13 * Authors (alphabetical): *
14 * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
15 * *
16 * Copyright (c) 2005-2018: *
17 * CERN, Switzerland *
18 * U. of Victoria, Canada *
19 * MPI-K Heidelberg, Germany *
20 * U. of Bonn, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
25 **********************************************************************************/
26
29
30#include "TMatrix.h"
31#include "TMVA/DNN/Optimizer.h"
32#include "TMVA/DNN/Functions.h"
33
34namespace TMVA {
35namespace DNN {
36
39 *
41 */
42template <typename Architecture_t, typename Layer_t = VGeneralLayer<Architecture_t>,
43 typename DeepNet_t = TDeepNet<Architecture_t, Layer_t>>
45public:
46 using Matrix_t = typename Architecture_t::Matrix_t;
47 using Scalar_t = typename Architecture_t::Scalar_t;
48
49protected:
50 Scalar_t fEpsilon; ///< The Smoothing term used to avoid division by zero.
51
52 std::vector<std::vector<Matrix_t>>
53 fPastSquaredWeightGradients; ///< The sum of the square of the past weight gradients associated with the deep net.
54 std::vector<std::vector<Matrix_t>>
55 fPastSquaredBiasGradients; ///< The sum of the square of the past bias gradients associated with the deep net.
56 std::vector<std::vector<Matrix_t>>
57 fWorkWeightTensor; ///< working tensor used to keep a temporary copy of weights or weight gradients
58 std::vector<std::vector<Matrix_t>>
59 fWorkBiasTensor; ///< working tensor used to keep a temporary copy of bias or bias gradients
60
61 /*! Update the weights, given the current weight gradients. */
62 void UpdateWeights(size_t layerIndex, std::vector<Matrix_t> &weights, const std::vector<Matrix_t> &weightGradients);
63
64 /*! Update the biases, given the current bias gradients. */
65 void UpdateBiases(size_t layerIndex, std::vector<Matrix_t> &biases, const std::vector<Matrix_t> &biasGradients);
66
67public:
68 /*! Constructor. */
70
71 /*! Destructor. */
73
74 /*! Getters */
75 Scalar_t GetEpsilon() const { return fEpsilon; }
76
79
82};
83
84//
85//
87//_________________________________________________________________________________________________
88template <typename Architecture_t, typename Layer_t, typename DeepNet_t>
90 : VOptimizer<Architecture_t, Layer_t, DeepNet_t>(learningRate, deepNet), fEpsilon(epsilon)
91{
92 std::vector<Layer_t *> &layers = deepNet.GetLayers();
93 const size_t layersNSlices = layers.size();
96 fWorkWeightTensor.resize(layersNSlices);
97 fWorkBiasTensor.resize(layersNSlices);
98
99 for (size_t i = 0; i < layersNSlices; i++) {
100 const size_t weightsNSlices = (layers[i]->GetWeights()).size();
101
102 // weight and weight gradients tensors should have same
104
105 for (size_t j = 0; j < weightsNSlices; j++) {
107 }
108
109 const size_t biasesNSlices = (layers[i]->GetBiases()).size();
110
112
113 for (size_t j = 0; j < biasesNSlices; j++) {
115 }
116
117 Architecture_t::CreateWeightTensors(fWorkWeightTensor[i], layers[i]->GetWeights());
118 Architecture_t::CreateWeightTensors(fWorkBiasTensor[i], layers[i]->GetBiases());
119
120 }
121}
122
123//_________________________________________________________________________________________________
124template <typename Architecture_t, typename Layer_t, typename DeepNet_t>
126 const std::vector<Matrix_t> &weightGradients) -> void
127{
129
130
131 const size_t weightsNSlices = weights.size();
133
134 for (size_t i = 0; i < weightsNSlices; i++) {
135
137 // Vt = Vt-1 + currentSquaredWeightGradients
141
142 // updating the weights.
143 // theta = theta - learningRate * currentWeightGradients / (sqrt(Vt + epsilon))
144
145 auto &currentWeightUpdates = fWorkWeightTensor[layerIndex][i]; // reuse the work tensor for the weight updates now
152 }
153}
154
155//_________________________________________________________________________________________________
156template <typename Architecture_t, typename Layer_t, typename DeepNet_t>
158 const std::vector<Matrix_t> &biasGradients) -> void
159{
161
162 const size_t biasesNSlices = biases.size();
164 for (size_t i = 0; i < biasesNSlices; i++) {
165
166 // Vt = Vt-1 + currentSquaredBiasGradients
171
172 // updating the biases.
173 // theta = theta - learningRate * currentBiasGradients / (sqrt(Vt + epsilon))
174
182 }
183}
184
185} // namespace DNN
186} // namespace TMVA
187
188#endif
#define e(i)
Definition: RSha256.hxx:103
void UpdateWeights(size_t layerIndex, std::vector< Matrix_t > &weights, const std::vector< Matrix_t > &weightGradients)
Update the weights, given the current weight gradients.
void UpdateBiases(size_t layerIndex, std::vector< Matrix_t > &biases, const std::vector< Matrix_t > &biasGradients)
Update the biases, given the current bias gradients.
std::vector< std::vector< Matrix_t > > & GetPastSquaredBiasGradients()
std::vector< std::vector< Matrix_t > > fPastSquaredBiasGradients
The sum of the square of the past bias gradients associated with the deep net.
Scalar_t GetEpsilon() const
Getters.
Constructor.
std::vector< std::vector< Matrix_t > > fPastSquaredWeightGradients
The sum of the square of the past weight gradients associated with the deep net.
typename Architecture_t::Matrix_t Matrix_t
typename Architecture_t::Scalar_t Scalar_t
Scalar_t fEpsilon
The Smoothing term used to avoid division by zero.
std::vector< std::vector< Matrix_t > > & GetPastSquaredWeightGradients()
std::vector< Matrix_t > & GetPastSquaredBiasGradientsAt(size_t i)
std::vector< std::vector< Matrix_t > > fWorkWeightTensor
working tensor used to keep a temporary copy of weights or weight gradients
std::vector< std::vector< Matrix_t > > fWorkBiasTensor
working tensor used to keep a temporary copy of bias or bias gradients