Logo ROOT   6.07/09
Reference Guide
Functions.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/06/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /////////////////////////////////////////////////////////////////////
13 // Contains function enums for activation and output functions, as //
14 // well as generic evaluation functions, that delegate the call to //
15 // the corresponding evaluation kernel. //
16 /////////////////////////////////////////////////////////////////////
17 
18 #ifndef TMVA_DNN_FUNCTIONS
19 #define TMVA_DNN_FUNCTIONS
20 
21 namespace TMVA
22 {
23 namespace DNN
24 {
25 //______________________________________________________________________________
26 //
27 // Enum Definitions
28 //______________________________________________________________________________
29 
30 /*! Enum that represents layer activation functions. */
32 {
33  kIdentity = 0,
34  kRelu = 1,
35  kSigmoid = 2,
36  kTanh = 3,
37  kSymmRelu = 4,
38  kSoftSign = 5,
39  kGauss = 6
40 };
41 
42 /*! Enum that represents output functions */
43 enum class EOutputFunction
44 {
45  kIdentity = 'I',
46  kSigmoid = 'S'
47 };
48 
49 /*! Enum that represents objective functions for the net, i.e. functions
50 * that take the output from the last layer in the net together with the
51 * truths and return the objective function values that is to be minimized
52 * in the training process. */
53 enum class ELossFunction
54 {
55  kCrossEntropy = 'C',
56  kMeanSquaredError = 'R'
57 };
58 
59 /*! Enum representing the regularization type applied for a given layer */
60 enum class ERegularization
61 {
62  kNone = '0',
63  kL1 = '1',
64  kL2 = '2'
65  };
66 
67 /* Enum represnting the initialization method used for this layer. */
68 enum class EInitialization {
69  kGauss = 'G',
70  kUniform = 'U',
71  kIdentity = 'I',
72  kZero = 'Z'
73 };
74 
75 //______________________________________________________________________________
76 //
77 // Activation Functions
78 //______________________________________________________________________________
79 
80 /*! Apply the given activation function to each value in the given
81 * matrix A. */
82 template<typename Architecture_t>
83 inline void evaluate(typename Architecture_t::Matrix_t &A,
85 {
86  switch(f)
87  {
88  case EActivationFunction::kIdentity : break;
89  case EActivationFunction::kRelu : Architecture_t::Relu(A);
90  break;
92  break;
94  break;
95  case EActivationFunction::kSymmRelu : Architecture_t::SymmetricRelu(A);
96  break;
98  break;
100  break;
101  }
102 }
103 
104 
105 /*! Compute the first partial derivative of the activation function for
106 * the values given in matrix A and write the results into B. */
107 //______________________________________________________________________________
108 template<typename Architecture_t>
109 inline void evaluateDerivative(typename Architecture_t::Matrix_t & B,
111  const typename Architecture_t::Matrix_t & A)
112 {
113  switch(f)
114  {
115  case EActivationFunction::kIdentity : Architecture_t::IdentityDerivative(B, A);
116  break;
117  case EActivationFunction::kRelu : Architecture_t::ReluDerivative(B, A);
118  break;
119  case EActivationFunction::kSigmoid : Architecture_t::SigmoidDerivative(B, A);
120  break;
121  case EActivationFunction::kTanh : Architecture_t::TanhDerivative(B, A);
122  break;
123  case EActivationFunction::kSymmRelu : Architecture_t::SymmetricReluDerivative(B, A);
124  break;
125  case EActivationFunction::kSoftSign : Architecture_t::SoftSignDerivative(B, A);
126  break;
127  case EActivationFunction::kGauss : Architecture_t::GaussDerivative(B, A);
128  break;
129  }
130 }
131 
132 //______________________________________________________________________________
133 //
134 // Output Functions
135 //______________________________________________________________________________
136 
137 /*! Apply the given output function to each value in the given
138 * matrix A. */
139 template<typename Architecture_t>
140 inline void evaluate(typename Architecture_t::Matrix_t &A,
142  const typename Architecture_t::Matrix_t &X)
143 {
144  switch(f)
145  {
147  break;
149  break;
150  }
151 }
152 
153 //______________________________________________________________________________
154 //
155 // Loss Functions
156 //______________________________________________________________________________
157 
158 /*! Compute the value of the objective function f for given activations
159 * of the ouput layer and the truth Y. */
160 template<typename Architecture_t>
162  const typename Architecture_t::Matrix_t & Y,
163  const typename Architecture_t::Matrix_t & output)
164 -> decltype(Architecture_t::CrossEntropy(Y,output))
165 {
166  switch(f)
167  {
169  return Architecture_t::CrossEntropy(Y, output);
171  return Architecture_t::MeanSquaredError(Y, output);
172  }
173  return 0.0;
174 }
175 
176 /*! Compute the gradient of the given output function f for given activations
177 * output of the output layer and truth Y and write the results into dY. */
178 //______________________________________________________________________________
179 template<typename Architecture_t>
180 inline void evaluateGradients(typename Architecture_t::Matrix_t & dY,
182  const typename Architecture_t::Matrix_t &Y,
183  const typename Architecture_t::Matrix_t &output)
184 {
185  switch(f)
186  {
188  Architecture_t::CrossEntropyGradients(dY, Y, output);
189  break;
191  Architecture_t::MeanSquaredErrorGradients(dY, Y, output);
192  break;
193  }
194 }
195 
196 
197 //______________________________________________________________________________
198 //
199 // Regularization
200 //______________________________________________________________________________
201 
202 /*! Evaluate the regularization functional for a given weight matrix. */
203 template<typename Architecture_t>
204 inline auto regularization(const typename Architecture_t::Matrix_t &A,
206 -> decltype(Architecture_t::L1Regularization(A))
207 {
208  switch(R)
209  {
211  return 0.0;
212  case ERegularization::kL1 :
213  return Architecture_t::L1Regularization(A);
214  case ERegularization::kL2 :
215  return Architecture_t::L2Regularization(A);
216  }
217  return 0.0;
218 }
219 
220 /*! Add the regularization gradient corresponding to weight matrix W, to
221 * the matrix A. */
222 //______________________________________________________________________________
223 template<typename Architecture_t>
224 inline void addRegularizationGradients(typename Architecture_t::Matrix_t &A,
225  const typename Architecture_t::Matrix_t &W,
226  typename Architecture_t::Scalar_t weightDecay,
228 {
229  switch(R)
230  {
232  break;
233  case ERegularization::kL1 :
234  Architecture_t::AddL1RegularizationGradients(A, W, weightDecay);
235  break;
236  case ERegularization::kL2 :
237  Architecture_t::AddL2RegularizationGradients(A, W, weightDecay);
238  break;
239  }
240 }
241 
242 //______________________________________________________________________________
243 //
244 // Initialization
245 //______________________________________________________________________________
246 
247 template<typename Architecture_t>
248 inline void initialize(typename Architecture_t::Matrix_t & A,
250 {
251  switch(m) {
252  case EInitialization::kGauss : Architecture_t::InitializeGauss(A);
253  break;
254  case EInitialization::kUniform : Architecture_t::InitializeUniform(A);
255  break;
256  case EInitialization::kIdentity : Architecture_t::InitializeIdentity(A);
257  break;
258  case EInitialization::kZero : Architecture_t::InitializeZero(A);
259  break;
260  }
261 }
262 
263 } // namespace DNN
264 } // namespace TMVA
265 
266 #endif
static double B[]
void evaluateDerivative(typename Architecture_t::Matrix_t &B, EActivationFunction f, const typename Architecture_t::Matrix_t &A)
Compute the first partial derivative of the activation function for the values given in matrix A and ...
Definition: Functions.h:109
static std::shared_ptr< std::function< double(double)> > Tanh
Definition: NeuralNet.icc:50
static double A[]
void evaluate(typename Architecture_t::Matrix_t &A, EActivationFunction f)
Apply the given activation function to each value in the given matrix A.
Definition: Functions.h:83
EInitialization
Definition: Functions.h:68
static std::shared_ptr< std::function< double(double)> > Sigmoid
Definition: NeuralNet.icc:47
void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f, const typename Architecture_t::Matrix_t &Y, const typename Architecture_t::Matrix_t &output)
Compute the gradient of the given output function f for given activations output of the output layer ...
Definition: Functions.h:180
double weightDecay(double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
compute the weight decay for regularization (L1 or L2)
Definition: NeuralNet.icc:491
TMarker * m
Definition: textangle.C:8
auto regularization(const typename Architecture_t::Matrix_t &A, ERegularization R) -> decltype(Architecture_t::L1Regularization(A))
Evaluate the regularization functional for a given weight matrix.
Definition: Functions.h:204
static std::shared_ptr< std::function< double(double)> > SoftSign
Definition: NeuralNet.icc:68
void Copy(void *source, void *dest)
void addRegularizationGradients(typename Architecture_t::Matrix_t &A, const typename Architecture_t::Matrix_t &W, typename Architecture_t::Scalar_t weightDecay, ERegularization R)
Add the regularization gradient corresponding to weight matrix W, to the matrix A.
Definition: Functions.h:224
double f(double x)
EOutputFunction
Enum that represents output functions.
Definition: Functions.h:43
ELossFunction
Enum that represents objective functions for the net, i.e.
Definition: Functions.h:53
static std::shared_ptr< std::function< double(double)> > Gauss
Definition: NeuralNet.icc:71
Abstract ClassifierFactory template that handles arbitrary types.
ERegularization
Enum representing the regularization type applied for a given layer.
Definition: Functions.h:60
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
static void output(int code)
Definition: gifencode.c:226
void initialize(typename Architecture_t::Matrix_t &A, EInitialization m)
Definition: Functions.h:248
TRandom3 R
a TMatrixD.
Definition: testIO.cxx:28