Logo ROOT   6.07/09
Reference Guide
TestDerivatives.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Simon Pfreundschuh
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////////////////
13 // Generic tests for the derivatives and gradiens of acitvation, //
14 // loss and regularization functions. Each function generates a //
15 // random 10 x 10 matrix and uses a central finite difference and //
16 // to numerically compute the derivative of the function //
17 // w.r.t. this element. The result is compared to the result //
18 // obtained by the corresponding analytic derivative implemented by //
19 // the evaluateDerivative(...), evaluateGradients(...), //
20 // addRegularizationGradients(...) functions. //
21 //////////////////////////////////////////////////////////////////////
22 
23 #include <iostream>
24 #include "TMVA/DNN/Functions.h"
25 #include "TMVA/DNN/Net.h"
26 #include "Utility.h"
27 
28 using namespace TMVA::DNN;
29 
30 //______________________________________________________________________________
31 //
32 // Activation Functions
33 //______________________________________________________________________________
34 
35 /*! Generic function that numerically computes the derivative of a matrix
36  * function f and the analytical solution given by df the function signatures
37  * are assumed to be
38  * - void f(Matrix_t &X)
39  * - void df(Matrix_t &Y, const Matrix_t &X) -> derivative of f at X(i,j) is
40  * The function f is supposed to apply the corresponding mathematical function
41  * to each element in the provided matrix X. The function df is expected to
42  * set each element in Y to the derivative of the corresponding mathematical
43  * function evaluated at the corresponding element in X.
44  */
45 template<typename Architecture, typename F, typename dF>
46  auto testDerivatives(F f, dF df,
47  typename Architecture::Scalar_t dx)
48  -> typename Architecture::Scalar_t
49 {
50  using Scalar_t = typename Architecture::Scalar_t;
51  using Matrix_t = typename Architecture::Matrix_t;
52 
53  Scalar_t maximum_error = 0.0;
54 
55  for (size_t i = 0; i < 100; i++)
56  {
57  Matrix_t X(10,10), Y(10,10);
58  randomMatrix(Y);
59 
60  df(X, Y);
61  Scalar_t dy = X(0,0);
62 
63  copyMatrix(X, Y);
64  X(0,0) += dx;
65  f(X);
66  Scalar_t y1 = X(0,0);
67  copyMatrix(X, Y);
68  X(0,0) -= dx;
69  f(X);
70  Scalar_t y0 = X(0,0);
71  Scalar_t dy_num = (y1 - y0) / (2.0 * dx);
72  Scalar_t error = relativeError(dy_num, dy);
73  maximum_error = std::max(maximum_error, error);
74  }
75 
76  return maximum_error;
77 }
78 
79 /*! Test derivatives of all activation functions and return the maximum relative
80  * error. Prints the result for each function to the stdout. */
81 //______________________________________________________________________________
82 template<typename Architecture>
84  -> typename Architecture::Scalar_t
85 {
86  using Scalar_t = typename Architecture::Scalar_t;
87  using Matrix_t = typename Architecture::Matrix_t;
88 
89  // Test only differentiable activation functions.
90  std::vector<EActivationFunction> EActivationFunctions
96 
97  Scalar_t error, maximum_error;
98  maximum_error = 0.0;
99 
100  for (auto & af : EActivationFunctions)
101  {
102  auto f = [& af](Matrix_t &X){ evaluate<Architecture>(X, af);};
103  auto df = [& af](Matrix_t &X, const Matrix_t &Y)
104  {
105  evaluateDerivative<Architecture>(X, af, Y);
106  };
107  error = testDerivatives<Architecture>(f, df, 5e-3);
108 
109  std::cout << "Testing " << static_cast<int>(af) << ": ";
110  std::cout << "Maximum Relative Error = " << error << std::endl;
111 
112  maximum_error = std::max(maximum_error, error);
113  }
114 
115  return maximum_error;
116 }
117 
118 //______________________________________________________________________________
119 //
120 // Loss functions.
121 //______________________________________________________________________________
122 
123 /*! Similar to testDerivatives only that here the mathematical function is
124  * expected to be a matrix functional, i.e. to be mapping a matrix to a
125  * scalar value. The scalar value is supposed to be computed by the provided
126  * function object f, while the function object is just like above. */
127 template<typename Architecture, typename F, typename dF>
128  auto testGradients(F f, dF df,
129  typename Architecture::Scalar_t dx)
130  -> typename Architecture::Scalar_t
131 {
132  using Scalar_t = typename Architecture::Scalar_t;
133  using Matrix_t = typename Architecture::Matrix_t;
134 
135  Scalar_t maximum_error = 0.0;
136 
137  for (size_t i = 0; i < 100; i++)
138  {
139  Matrix_t X(10,10), Y(10,10), Z(10,10);
140  randomMatrix(X);
141  randomMatrix(Y);
142 
143  df(Z, Y, X);
144  Scalar_t dy = Z(0,0);
145 
146  X(0,0) += dx;
147  Scalar_t y1 = f(Y,X);
148  X(0,0) -= 2.0 * dx;
149  Scalar_t y0 = f(Y,X);
150  Scalar_t dy_num = (y1 - y0) / (2.0 * dx);
151 
152  Scalar_t error = 0.0;
153  if (std::fabs(dy) > 0)
154  {
155  error = std::fabs((dy_num - dy) / dy);
156  }
157  else
158  error = dy_num - dy;
159 
160  maximum_error = std::max(maximum_error, error);
161  }
162 
163  return maximum_error;
164 }
165 
166 /*! Test gradients of all loss function for the given architecture type and
167  * return the maximum relative error. Prints results for each function to
168  * standard out. */
169 //______________________________________________________________________________
170 template<typename Architecture>
172  -> typename Architecture::Scalar_t
173 {
174  using Scalar_t = typename Architecture::Scalar_t;
175  using Matrix_t = typename Architecture::Matrix_t;
176 
177  std::vector<ELossFunction> LossFunctions
180 
181  Scalar_t error, maximum_error;
182  maximum_error = 0.0;
183 
184  for (auto & lf : LossFunctions)
185  {
186  auto f = [lf](const Matrix_t &Y, const Matrix_t &Z)
187  {
188  return evaluate<Architecture>(lf, Y, Z);
189  };
190  auto df = [& lf](Matrix_t &X,
191  const Matrix_t &Y,
192  const Matrix_t &Z)
193  {
194  evaluateGradients<Architecture>(X, lf, Y, Z);
195  };
196 
197  error = testGradients<Architecture>(f, df, 5e-6);
198 
199  std::cout << "Testing " << static_cast<char>(lf) << ": ";
200  std::cout << "Maximum Relative Error = " << error << std::endl;
201 
202  maximum_error = std::max(maximum_error, error);
203  }
204 
205  return maximum_error;
206 }
207 
208 //______________________________________________________________________________
209 //
210 // Regularization.
211 //______________________________________________________________________________
212 
213 /*! Test the computation of gradients for all differentiable regularization types,
214  * which is so far only L2 and no regularization and print the results to standard
215  * out */
216 template<typename Architecture>
218  -> typename Architecture::Scalar_t
219 {
220  using Scalar_t = typename Architecture::Scalar_t;
221  using Matrix_t = typename Architecture::Matrix_t;
222 
223  std::vector<ERegularization> Regularizations
226 
227  Scalar_t error, maximum_error;
228  maximum_error = 0.0;
229 
230  for (auto & r : Regularizations)
231  {
232  auto f = [r](const Matrix_t & , const Matrix_t & Y)
233  {
234  return regularization<Architecture>(Y, r);
235  };
236  auto df = [& r](Matrix_t &X,
237  const Matrix_t & ,
238  const Matrix_t & Y)
239  {
240  applyMatrix(X, [](double){return 0.0;});
241  addRegularizationGradients<Architecture>(X, Y, (Scalar_t) 1.0, r);
242  };
243 
244  error = testGradients<Architecture>(f, df, 1.0);
245 
246  std::cout << "Testing " << static_cast<char>(r) << ": ";
247  std::cout << "Maximum Relative Error = " << error << std::endl;
248 
249  maximum_error = std::max(maximum_error, error);
250  }
251 
252  return maximum_error;
253 }
auto testLossFunctionGradients() -> typename Architecture::Scalar_t
Test gradients of all loss function for the given architecture type and return the maximum relative e...
void randomMatrix(AMatrix &X)
Fill matrix with random, Gaussian-distributed values.
Definition: Utility.h:59
void applyMatrix(AMatrix &X, F f)
Apply functional to each element in the matrix.
Definition: Utility.h:103
auto testDerivatives(F f, dF df, typename Architecture::Scalar_t dx) -> typename Architecture::Scalar_t
Generic function that numerically computes the derivative of a matrix function f and the analytical s...
Definition: Blas.h:58
auto testActivationFunctionDerivatives() -> typename Architecture::Scalar_t
Test derivatives of all activation functions and return the maximum relative error.
#define F(x, y, z)
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
TRandom2 r(17)
void copyMatrix(AMatrix &X, const AMatrix &Y)
Generate a random batch as input for a neural net.
Definition: Utility.h:87
double f(double x)
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
AFloat relativeError(const AFloat &x, const AFloat &y)
Compute the relative error of x and y normalized by y.
auto testGradients(F f, dF df, typename Architecture::Scalar_t dx) -> typename Architecture::Scalar_t
Similar to testDerivatives only that here the mathematical function is expected to be a matrix functi...
auto testRegularizationGradients() -> typename Architecture::Scalar_t
Test the computation of gradients for all differentiable regularization types, which is so far only L...