Logo ROOT   6.10/09
Reference Guide
TestDerivatives.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Simon Pfreundschuh
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////////////////
13 // Generic tests for the derivatives and gradiens of acitvation, //
14 // loss and regularization functions. Each function generates a //
15 // random 10 x 10 matrix and uses a central finite difference and //
16 // to numerically compute the derivative of the function //
17 // w.r.t. this element. The result is compared to the result //
18 // obtained by the corresponding analytic derivative implemented by //
19 // the evaluateDerivative(...), evaluateGradients(...), //
20 // addRegularizationGradients(...) functions. //
21 //////////////////////////////////////////////////////////////////////
22 
23 #include <iostream>
24 #include "TMVA/DNN/Functions.h"
25 #include "TMVA/DNN/Net.h"
26 #include "Utility.h"
27 
28 using namespace TMVA::DNN;
29 
30 //______________________________________________________________________________
31 //
32 // Activation Functions
33 //______________________________________________________________________________
34 
35 /*! Generic function that numerically computes the derivative of a matrix
36  * function f and the analytical solution given by df the function signatures
37  * are assumed to be
38  * - void f(Matrix_t &X)
39  * - void df(Matrix_t &Y, const Matrix_t &X) -> derivative of f at X(i,j) is
40  * The function f is supposed to apply the corresponding mathematical function
41  * to each element in the provided matrix X. The function df is expected to
42  * set each element in Y to the derivative of the corresponding mathematical
43  * function evaluated at the corresponding element in X.
44  */
45 template<typename Architecture, typename F, typename dF>
46  auto testDerivatives(F f, dF df,
47  typename Architecture::Scalar_t dx)
48  -> typename Architecture::Scalar_t
49 {
50  using Scalar_t = typename Architecture::Scalar_t;
51  using Matrix_t = typename Architecture::Matrix_t;
52 
53  Scalar_t maximum_error = 0.0;
54 
55  for (size_t i = 0; i < 100; i++)
56  {
57  Matrix_t X(10,10), Y(10,10);
58  randomMatrix(Y);
59 
60  df(X, Y);
61  Scalar_t dy = X(0,0);
62 
63  copyMatrix(X, Y);
64  X(0,0) += dx;
65  f(X);
66  Scalar_t y1 = X(0,0);
67  copyMatrix(X, Y);
68  X(0,0) -= dx;
69  f(X);
70  Scalar_t y0 = X(0,0);
71  Scalar_t dy_num = (y1 - y0) / (2.0 * dx);
72  Scalar_t error = relativeError(dy_num, dy);
73  maximum_error = std::max(maximum_error, error);
74  }
75 
76  return maximum_error;
77 }
78 
79 /*! Test derivatives of all activation functions and return the maximum relative
80  * error. Prints the result for each function to the stdout. */
81 //______________________________________________________________________________
82 template<typename Architecture>
84  -> typename Architecture::Scalar_t
85 {
86  using Scalar_t = typename Architecture::Scalar_t;
87  using Matrix_t = typename Architecture::Matrix_t;
88 
89  // Test only differentiable activation functions.
90  std::vector<EActivationFunction> EActivationFunctions
96 
97  Scalar_t error, maximum_error;
98  maximum_error = 0.0;
99 
100  for (auto & af : EActivationFunctions)
101  {
102  auto f = [& af](Matrix_t &X){ evaluate<Architecture>(X, af);};
103  auto df = [& af](Matrix_t &X, const Matrix_t &Y)
104  {
105  evaluateDerivative<Architecture>(X, af, Y);
106  };
107  error = testDerivatives<Architecture>(f, df, 1.0e-04);
108 
109  std::cout << "Testing " << static_cast<int>(af) << ": ";
110  std::cout << "Maximum Relative Error = " << error << std::endl;
111 
112  maximum_error = std::max(maximum_error, error);
113  }
114 
115  return maximum_error;
116 }
117 
118 //______________________________________________________________________________
119 //
120 // Loss functions.
121 //______________________________________________________________________________
122 
123 /*! Similar to testDerivatives only that here the mathematical function is
124  * expected to be a matrix functional, i.e. to be mapping a matrix to a
125  * scalar value. The scalar value is supposed to be computed by the provided
126  * function object f, while the function object is just like above. */
127 template<typename Architecture, typename F, typename dF>
128  auto testGradients(F f, dF df,
129  typename Architecture::Scalar_t dx)
130  -> typename Architecture::Scalar_t
131 {
132  using Scalar_t = typename Architecture::Scalar_t;
133  using Matrix_t = typename Architecture::Matrix_t;
134 
135  Scalar_t maximum_error = 0.0;
136 
137  for (size_t i = 0; i < 100; i++)
138  {
139  Matrix_t X(10,10), Y(10,10), Z(10,10);
140  randomMatrix(X);
141  randomMatrix(Y);
142 
143  df(Z, Y, X);
144  Scalar_t dy = Z(0,0);
145 
146  X(0,0) += dx;
147  Scalar_t y1 = f(Y,X);
148  X(0,0) -= 2.0 * dx;
149  Scalar_t y0 = f(Y,X);
150  Scalar_t dy_num = (y1 - y0) / (2.0 * dx);
151 
152  Scalar_t error = 0.0;
153  if (std::fabs(dy) > 0)
154  {
155  error = std::fabs((dy_num - dy) / dy);
156  }
157  else
158  error = dy_num - dy;
159 
160  maximum_error = std::max(maximum_error, error);
161  }
162 
163  return maximum_error;
164 }
165 
166 /*! Test gradients of all loss function for the given architecture type and
167  * return the maximum relative error. Prints results for each function to
168  * standard out. */
169 //______________________________________________________________________________
170 template<typename Architecture>
172  -> typename Architecture::Scalar_t
173 {
174  using Scalar_t = typename Architecture::Scalar_t;
175  using Matrix_t = typename Architecture::Matrix_t;
176 
177  std::vector<ELossFunction> LossFunctions
181 
182  Scalar_t error, maximum_error;
183  maximum_error = 0.0;
184 
185  for (auto & lf : LossFunctions)
186  {
187  auto f = [lf](const Matrix_t &Y, const Matrix_t &Z)
188  {
189  return evaluate<Architecture>(lf, Y, Z);
190  };
191  auto df = [& lf](Matrix_t &X,
192  const Matrix_t &Y,
193  const Matrix_t &Z)
194  {
195  evaluateGradients<Architecture>(X, lf, Y, Z);
196  };
197 
198  error = testGradients<Architecture>(f, df, 5e-6);
199 
200  std::cout << "Testing " << static_cast<char>(lf) << ": ";
201  std::cout << "Maximum Relative Error = " << error << std::endl;
202 
203  maximum_error = std::max(maximum_error, error);
204  }
205 
206  return maximum_error;
207 }
208 
209 //______________________________________________________________________________
210 //
211 // Regularization.
212 //______________________________________________________________________________
213 
214 /*! Test the computation of gradients for all differentiable regularization types,
215  * which is so far only L2 and no regularization and print the results to standard
216  * out */
217 template<typename Architecture>
219  -> typename Architecture::Scalar_t
220 {
221  using Scalar_t = typename Architecture::Scalar_t;
222  using Matrix_t = typename Architecture::Matrix_t;
223 
224  std::vector<ERegularization> Regularizations
227 
228  Scalar_t error, maximum_error;
229  maximum_error = 0.0;
230 
231  for (auto & r : Regularizations)
232  {
233  auto f = [r](const Matrix_t & , const Matrix_t & Y)
234  {
235  return regularization<Architecture>(Y, r);
236  };
237  auto df = [& r](Matrix_t &X,
238  const Matrix_t & ,
239  const Matrix_t & Y)
240  {
241  applyMatrix(X, [](double){return 0.0;});
242  addRegularizationGradients<Architecture>(X, Y, (Scalar_t) 1.0, r);
243  };
244 
245  error = testGradients<Architecture>(f, df, 1.0);
246 
247  std::cout << "Testing " << static_cast<char>(r) << ": ";
248  std::cout << "Maximum Relative Error = " << error << std::endl;
249 
250  maximum_error = std::max(maximum_error, error);
251  }
252 
253  return maximum_error;
254 }
auto testLossFunctionGradients() -> typename Architecture::Scalar_t
Test gradients of all loss function for the given architecture type and return the maximum relative e...
void randomMatrix(AMatrix &X)
Fill matrix with random, Gaussian-distributed values.
Definition: Utility.h:60
void applyMatrix(AMatrix &X, F f)
Apply functional to each element in the matrix.
Definition: Utility.h:104
auto testDerivatives(F f, dF df, typename Architecture::Scalar_t dx) -> typename Architecture::Scalar_t
Generic function that numerically computes the derivative of a matrix function f and the analytical s...
Definition: Blas.h:58
auto testActivationFunctionDerivatives() -> typename Architecture::Scalar_t
Test derivatives of all activation functions and return the maximum relative error.
#define F(x, y, z)
VecExpr< UnaryOp< Fabs< T >, VecExpr< A, T, D >, T >, T, D > fabs(const VecExpr< A, T, D > &rhs)
TRandom2 r(17)
void copyMatrix(AMatrix &X, const AMatrix &Y)
Generate a random batch as input for a neural net.
Definition: Utility.h:88
T relativeError(const T &x, const T &y)
Compute the relative error of x and y.
Definition: Utility.h:179
double f(double x)
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
auto testGradients(F f, dF df, typename Architecture::Scalar_t dx) -> typename Architecture::Scalar_t
Similar to testDerivatives only that here the mathematical function is expected to be a matrix functi...
auto testRegularizationGradients() -> typename Architecture::Scalar_t
Test the computation of gradients for all differentiable regularization types, which is so far only L...