Logo ROOT   master
Reference Guide
Functions.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/06/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /////////////////////////////////////////////////////////////////////
13 // Contains function enums for activation and output functions, as //
14 // well as generic evaluation functions, that delegate the call to //
15 // the corresponding evaluation kernel. //
16 /////////////////////////////////////////////////////////////////////
17 
18 #ifndef TMVA_DNN_FUNCTIONS
19 #define TMVA_DNN_FUNCTIONS
20 
21 namespace TMVA
22 {
23 namespace DNN
24 {
25 //______________________________________________________________________________
26 //
27 // Enum Definitions
28 //______________________________________________________________________________
29 
30 /*! Enum that represents layer activation functions. */
32 {
33  kIdentity = 0,
34  kRelu = 1,
35  kSigmoid = 2,
36  kTanh = 3,
37  kSymmRelu = 4,
38  kSoftSign = 5,
39  kGauss = 6,
40  kFastTanh = 7
41 
42 };
43 
44 /*! Enum that represents output functions */
45 enum class EOutputFunction
46 {
47  kIdentity = 'I',
48  kSigmoid = 'S',
49  kSoftmax = 'M'
50 };
51 
52 /*! Enum that represents objective functions for the net, i.e. functions
53 * that take the output from the last layer in the net together with the
54 * truths and return the objective function values that is to be minimized
55 * in the training process. */
56 enum class ELossFunction
57 {
58  kCrossEntropy = 'C',
59  kMeanSquaredError = 'R',
61 };
62 
63 /*! Enum representing the regularization type applied for a given layer */
64 enum class ERegularization
65 {
66  kNone = '0',
67  kL1 = '1',
68  kL2 = '2'
69  };
70 
71 /* Enum represnting the initialization method used for this layer. */
72 enum class EInitialization {
73  kGauss = 'G',
74  kUniform = 'U',
75  kIdentity = 'I',
76  kZero = 'Z',
77  kGlorotNormal = 'X',
78  kGlorotUniform = 'F',
79 };
80 
81 /// Enum representing the optimizer used for training.
82 enum class EOptimizer {
83  kSGD = 0,
84  kAdam = 1,
85  kAdagrad = 2,
86  kRMSProp = 3,
87  kAdadelta = 4,
88 };
89 
90 //______________________________________________________________________________
91 //
92 // Activation Functions
93 //______________________________________________________________________________
94 
95 /*! Apply the given activation function to each value in the given
96 * tensor A. */
97 template<typename Architecture_t>
98 inline void evaluate(typename Architecture_t::Tensor_t &A,
100 {
101  switch(f)
102  {
103  case EActivationFunction::kIdentity : break;
104  case EActivationFunction::kRelu : Architecture_t::Relu(A);
105  break;
107  break;
109  break;
110  case EActivationFunction::kSymmRelu : Architecture_t::SymmetricRelu(A);
111  break;
113  break;
115  break;
116  case EActivationFunction::kFastTanh : Architecture_t::FastTanh(A);
117  break;
118  }
119 }
120 
121 /*! Compute the first partial derivative of the activation function for
122 * the values given in tensor A and write the results into B. */
123 //______________________________________________________________________________
124 template<typename Architecture_t>
125 inline void evaluateDerivative(typename Architecture_t::Tensor_t & B,
127  const typename Architecture_t::Tensor_t & A)
128 {
129  switch(f)
130  {
131  case EActivationFunction::kIdentity : Architecture_t::IdentityDerivative(B, A);
132  break;
133  case EActivationFunction::kRelu : Architecture_t::ReluDerivative(B, A);
134  break;
135  case EActivationFunction::kSigmoid : Architecture_t::SigmoidDerivative(B, A);
136  break;
137  case EActivationFunction::kTanh : Architecture_t::TanhDerivative(B, A);
138  break;
139  case EActivationFunction::kSymmRelu : Architecture_t::SymmetricReluDerivative(B, A);
140  break;
141  case EActivationFunction::kSoftSign : Architecture_t::SoftSignDerivative(B, A);
142  break;
143  case EActivationFunction::kGauss : Architecture_t::GaussDerivative(B, A);
144  break;
145  case EActivationFunction::kFastTanh : Architecture_t::FastTanhDerivative(B, A);
146  break;
147  }
148 }
149 
150 // matrix version of the function (for backward comp.)
151 template<typename Architecture_t>
152 inline void evaluateMatrix( typename Architecture_t::Matrix_t &A,
154 {
155  typename Architecture_t::Tensor_t t(A);
156  evaluate<Architecture_t>(t,f);
157 }
158 
159 template<typename Architecture_t>
160 inline void evaluateDerivativeMatrix( typename Architecture_t::Matrix_t &B,
162  const typename Architecture_t::Matrix_t & A)
163 {
164  typename Architecture_t::Tensor_t t(B);
165  evaluateDerivative<Architecture_t>(t,f, typename Architecture_t::Tensor_t(A));
166 }
167 //______________________________________________________________________________
168 //
169 // Output Functions
170 //______________________________________________________________________________
171 
172 /*! Apply the given output function to each value in the given
173 * tensor A. */
174 template<typename Architecture_t>
175 inline void evaluate(typename Architecture_t::Matrix_t &A,
177  const typename Architecture_t::Matrix_t &X)
178 {
179  switch(f)
180  {
182  break;
184  break;
185  case EOutputFunction::kSoftmax : Architecture_t::Softmax(A, X);
186  break;
187  }
188 }
189 
190 //______________________________________________________________________________
191 //
192 // Loss Functions
193 //______________________________________________________________________________
194 
195 /*! Compute the value of the objective function f for given activations
196 * of the ouput layer and the truth Y. */
197 template <typename Architecture_t>
198 inline auto evaluate(ELossFunction f, const typename Architecture_t::Matrix_t &Y,
199  const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
200  -> decltype(Architecture_t::CrossEntropy(Y, output, weights))
201 {
202  switch(f)
203  {
204  case ELossFunction::kCrossEntropy: return Architecture_t::CrossEntropy(Y, output, weights);
205  case ELossFunction::kMeanSquaredError: return Architecture_t::MeanSquaredError(Y, output, weights);
206  case ELossFunction::kSoftmaxCrossEntropy: return Architecture_t::SoftmaxCrossEntropy(Y, output, weights);
207  }
208  return 0.0;
209 }
210 
211 /*! Compute the gradient of the given output function f for given activations
212 * output of the output layer and truth Y and write the results into dY. */
213 //______________________________________________________________________________
214 template <typename Architecture_t>
215 inline void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f,
216  const typename Architecture_t::Matrix_t &Y,
217  const typename Architecture_t::Matrix_t &output,
218  const typename Architecture_t::Matrix_t &weights)
219 {
220  switch(f)
221  {
222  case ELossFunction::kCrossEntropy: Architecture_t::CrossEntropyGradients(dY, Y, output, weights); break;
223  case ELossFunction::kMeanSquaredError: Architecture_t::MeanSquaredErrorGradients(dY, Y, output, weights); break;
225  Architecture_t::SoftmaxCrossEntropyGradients(dY, Y, output, weights);
226  break;
227  }
228 }
229 
230 
231 //______________________________________________________________________________
232 //
233 // Regularization
234 //______________________________________________________________________________
235 
236 /*! Evaluate the regularization functional for a given weight matrix. */
237 template<typename Architecture_t>
238 inline auto regularization(const typename Architecture_t::Matrix_t &A,
240 -> decltype(Architecture_t::L1Regularization(A))
241 {
242  switch(R)
243  {
245  return 0.0;
246  case ERegularization::kL1 :
247  return Architecture_t::L1Regularization(A);
248  case ERegularization::kL2 :
249  return Architecture_t::L2Regularization(A);
250  }
251  return 0.0;
252 }
253 
254 /*! Add the regularization gradient corresponding to weight matrix W, to
255 * the matrix A. */
256 //______________________________________________________________________________
257 template<typename Architecture_t>
258 inline void addRegularizationGradients(typename Architecture_t::Matrix_t &A,
259  const typename Architecture_t::Matrix_t &W,
260  typename Architecture_t::Scalar_t weightDecay,
262 {
263  switch(R)
264  {
266  break;
267  case ERegularization::kL1 :
268  Architecture_t::AddL1RegularizationGradients(A, W, weightDecay);
269  break;
270  case ERegularization::kL2 :
271  Architecture_t::AddL2RegularizationGradients(A, W, weightDecay);
272  break;
273  }
274 }
275 
276 //______________________________________________________________________________
277 //
278 // Initialization
279 //______________________________________________________________________________
280 
281 template<typename Architecture_t>
282 inline void initialize(typename Architecture_t::Matrix_t & A,
284 {
285  switch(m) {
286  case EInitialization::kGauss : Architecture_t::InitializeGauss(A);
287  break;
288  case EInitialization::kUniform : Architecture_t::InitializeUniform(A);
289  break;
290  case EInitialization::kIdentity : Architecture_t::InitializeIdentity(A);
291  break;
292  case EInitialization::kZero : Architecture_t::InitializeZero(A);
293  break;
294  case EInitialization::kGlorotNormal : Architecture_t::InitializeGlorotNormal(A);
295  break;
296  case EInitialization::kGlorotUniform : Architecture_t::InitializeGlorotUniform(A);
297  break;
298  }
299 }
300 
301 } // namespace DNN
302 } // namespace TMVA
303 
304 #endif
static double B[]
auto * m
Definition: textangle.C:8
void evaluateDerivativeMatrix(typename Architecture_t::Matrix_t &B, EActivationFunction f, const typename Architecture_t::Matrix_t &A)
Definition: Functions.h:160
#define f(i)
Definition: RSha256.hxx:104
void evaluateDerivative(typename Architecture_t::Tensor_t &B, EActivationFunction f, const typename Architecture_t::Tensor_t &A)
Compute the first partial derivative of the activation function for the values given in tensor A and ...
Definition: Functions.h:125
static double A[]
#define R(a, b, c, d, e, f, g, h, i)
Definition: RSha256.hxx:110
std::shared_ptr< std::function< double(double)> > SoftSign
Definition: NeuralNet.cxx:32
EInitialization
Definition: Functions.h:72
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:498
void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f, const typename Architecture_t::Matrix_t &Y, const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
Compute the gradient of the given output function f for given activations output of the output layer ...
Definition: Functions.h:215
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:238
std::shared_ptr< std::function< double(double)> > Tanh
Definition: NeuralNet.cxx:29
void Copy(void *source, void *dest)
void addRegularizationGradients(typename Architecture_t::Matrix_t &A, const typename Architecture_t::Matrix_t &W, typename Architecture_t::Scalar_t weightDecay, ERegularization R)
Add the regularization gradient corresponding to weight matrix W, to the matrix A.
Definition: Functions.h:258
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:45
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:56
create variable transformations
std::shared_ptr< std::function< double(double)> > Sigmoid
Definition: NeuralNet.cxx:26
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:64
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
std::shared_ptr< std::function< double(double)> > Gauss
Definition: NeuralNet.cxx:12
void evaluateMatrix(typename Architecture_t::Matrix_t &A, EActivationFunction f)
Definition: Functions.h:152
void evaluate(typename Architecture_t::Tensor_t &A, EActivationFunction f)
Apply the given activation function to each value in the given tensor A.
Definition: Functions.h:98
static void output(int code)
Definition: gifencode.c:226
void initialize(typename Architecture_t::Matrix_t &A, EInitialization m)
Definition: Functions.h:282
EOptimizer
Enum representing the optimizer used for training.
Definition: Functions.h:82