Logo ROOT   6.14/05
Reference Guide
Propagation.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 10/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////////////////
13 // Implementation of the functions required for the forward and //
14 // backward propagation of activations through a neural network for //
15 // the reference implementation. //
16 //////////////////////////////////////////////////////////////////////
17 
20 
21 namespace TMVA {
22 namespace DNN {
23 
24 template <typename AFloat>
26  const TCpuMatrix<AFloat> &Weights)
27 {
28 
29  int m = (int)input.GetNrows();
30  int k = (int)input.GetNcols();
31  int n = (int)Weights.GetNrows();
32 
33  if ((int)output.GetNrows() != m) {
34  Error("MultiplyTranspose","Invalid input - output rows - input: %d != output : %d",m, (int) output.GetNrows());
35  R__ASSERT((int) output.GetNrows() == m);
36  }
37  if ((int)output.GetNcols() != n) {
38  Error("MultiplyTranspose","Invalid output cols or weight rows - output cols: %d != weight rows : %d",(int) output.GetNcols(),n);
39  R__ASSERT((int) output.GetNcols() == n);
40  }
41  if ((int)Weights.GetNcols() != k) {
42  Error("MultiplyTranspose","Invalid input cols or weight cols - input cols: %d != weight cols : %d", k, (int) Weights.GetNcols());
43  R__ASSERT((int) Weights.GetNcols() == k);
44  }
45 
46  char transa = 'N';
47  char transb = 'T';
48 
49  AFloat alpha = 1.0;
50  AFloat beta = 0.0;
51 
52  const AFloat *A = input.GetRawDataPointer();
53  const AFloat *B = Weights.GetRawDataPointer();
54  AFloat *C = output.GetRawDataPointer();
55 
56  ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha, A, &m, B, &n, &beta, C, &m);
57 }
58 
59 template <typename AFloat>
61 {
62  int m = (int)output.GetNrows();
63  int n = (int)output.GetNcols();
64 
65  int inc = 1.0;
66  AFloat alpha = 1.0;
67 
68  AFloat *A = output.GetRawDataPointer();
69  const AFloat *x = TCpuMatrix<AFloat>::GetOnePointer();
70  const AFloat *y = biases.GetRawDataPointer();
71 
73  R__ASSERT(n <= (int)(biases.GetNcols()*biases.GetNrows()));
74 
75  ::TMVA::DNN::Blas::Ger(&m, &n, &alpha, x, &inc, y, &inc, A, &m);
76 }
77 
78 template <typename AFloat>
79 void TCpu<AFloat>::Backward(TCpuMatrix<AFloat> &activationGradientsBackward, TCpuMatrix<AFloat> &weightGradients,
80  TCpuMatrix<AFloat> &biasGradients, TCpuMatrix<AFloat> &df,
81  const TCpuMatrix<AFloat> &activationGradients, const TCpuMatrix<AFloat> &weights,
82  const TCpuMatrix<AFloat> &activationsBackward)
83 {
84  // Compute element-wise product.
85  Hadamard(df, activationGradients);
86 
87  // Activation gradients.
88  if (activationGradientsBackward.GetNElements() > 0) Multiply(activationGradientsBackward, df, weights);
89 
90  // Weight gradients.
91  if (weightGradients.GetNElements() > 0) TransposeMultiply(weightGradients, df, activationsBackward);
92 
93  // Bias gradients.
94  if (biasGradients.GetNElements() > 0) SumColumns(biasGradients, df);
95 }
96 
97 //____________________________________________________________________________
98 template <typename AFloat>
99 void TCpu<AFloat>::Im2col(TCpuMatrix<AFloat> &A, const TCpuMatrix<AFloat> &B, size_t imgHeight, size_t imgWidth,
100  size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
101  size_t zeroPaddingHeight, size_t zeroPaddingWidth)
102 {
103 
104  // image boudaries
105  int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
106  int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
107  size_t currLocalView = 0;
108 
109  const int halfFltHeight = fltHeight / 2;
110  const int halfFltWidth = fltWidth / 2;
111  const int halfFltHeightM1 = (fltHeight - 1) / 2;
112  const int halfFltWidthM1 = (fltWidth - 1) / 2;
113  const int nRowsInput = B.GetNrows();
114  const int nColsInput = B.GetNcols();
115  const int nRowsOutput = A.GetNrows();
116  const int nColsOutput = A.GetNcols();
117 
118  // convolution centers
119  for (int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {
120  for (int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {
121  size_t currLocalViewPixel = 0;
122 
123  // within the local view
124  R__ASSERT((int) currLocalView < nRowsOutput );
125 
126  for (int m = 0; m < nRowsInput; m++) {
127  for (int k = i - halfFltHeight ; k <= Int_t(i + halfFltHeightM1 ); k++) {
128  int kstep = k * imgWidth;
129  for (int l = j - halfFltWidth ; l <= Int_t(j + halfFltWidthM1); l++) {
130 
131  // Check the boundaries
132  R__ASSERT((int) currLocalViewPixel < nColsOutput );
133  //R__ASSERT(k * imgWidth + l < B.GetNcols());
134  if (k < 0 || k >= (Int_t)imgHeight || l < 0 || l >= (Int_t)imgWidth || kstep + l >= nColsInput)
135  A(currLocalView, currLocalViewPixel++) = 0;
136  else
137  A(currLocalView, currLocalViewPixel++) = B(m, kstep + l);
138  }
139  }
140  }
141  //std::cout << " i " << i << " " << j << " increment currLocalView " << currLocalView << std::endl;
142  currLocalView++;
143  }
144  }
145  //PrintMatrix(A,"FromIm2Col");
146 }
147 
148 //____________________________________________________________________________
149 template <typename AFloat>
150 void TCpu<AFloat>::Im2colIndices(std::vector<int> &V, const TCpuMatrix<AFloat> &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth,
151  size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
152  size_t zeroPaddingHeight, size_t zeroPaddingWidth)
153 {
154 
155  // image boudaries
156  int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
157  int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
158  size_t currLocalView = 0;
159 
160  const int halfFltHeight = fltHeight / 2;
161  const int halfFltWidth = fltWidth / 2;
162  const int halfFltHeightM1 = (fltHeight - 1) / 2;
163  const int halfFltWidthM1 = (fltWidth - 1) / 2;
164  const int nRowsInput = B.GetNrows();
165  const int nColsInput = B.GetNcols();
166  const size_t nSizeOutput = V.size();
167  const int npixels = nRowsInput * fltHeight * fltWidth;
168  // const int nRowsOutput = A.GetNrows();
169  // const int nColsOutput = A.GetNcols();
170 
171  // convolution centers
172  for (int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {
173  for (int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {
174  size_t currLocalViewPixel = 0;
175 
176  // within the local view
177  //R__ASSERT((int) currLocalView < nRowsOutput );
178 
179  for (int m = 0; m < nRowsInput; m++) {
180  for (int k = i - halfFltHeight ; k <= Int_t(i + halfFltHeightM1 ); k++) {
181  int kstep = k * imgWidth;
182  for (int l = j - halfFltWidth ; l <= Int_t(j + halfFltWidthM1); l++) {
183 
184  // Check the boundaries
185  //R__ASSERT(currLocalViewPixel < nColsOutput );
186  R__ASSERT(currLocalView * npixels + currLocalViewPixel < nSizeOutput );
187  if (k < 0 || k >= (Int_t)imgHeight || l < 0 || l >= (Int_t)imgWidth || kstep + l >= nColsInput)
188  //V[currLocalView * npixels + currLocalViewPixel]=-1;
189  V[currLocalViewPixel * nLocalViews + currLocalView] = -1;
190  else
191  V[currLocalViewPixel * nLocalViews + currLocalView]= ( kstep + l) * nRowsInput + m;
192 
193  currLocalViewPixel++;
194  }
195  }
196  }
197  currLocalView++;
198  }
199  }
200 }
201 template <typename AFloat>
202 void TCpu<AFloat>::Im2colFast(TCpuMatrix<AFloat> &A, const TCpuMatrix<AFloat> &B, const std::vector<int> &V)
203 {
204  size_t n = V.size();
205  R__ASSERT( n == A.GetNcols() * A.GetNrows() );
206  AFloat * a = A.GetRawDataPointer();
207  const AFloat * b = B.GetRawDataPointer();
208 
209 //#define DL_USE_MTE
210  // parallel execution
211 #ifdef DL_USE_MTE
212  const size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(n);
213 
214  auto f = [&](UInt_t workerID)
215  {
216  for (size_t j = 0; j < nsteps; ++j) {
217  size_t ii = workerID+j;
218  if (ii >= n) break;
219  int idx = V[ii];
220  if (idx >= 0) a[ii] = b[idx];
221  else a[ii] = 0;
222  }
223  return 0;
224  };
225 
226  A.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,n,nsteps) );
227 
228 #else
229  //serial execution
230  for (size_t ii = 0; ii < n; ++ii) {
231  int idx = V[ii];
232  if (idx >= 0) a[ii] = b[idx];
233  else a[ii] = 0;
234  }
235 
236 #endif
237  // PrintMatrix(A,"FromFastIm2Col");
238  // PrintMatrix(B,"input to Im2Col");
239  // std::cout << "V vector " << V.size() << std::endl;
240  // for ( int i = 0; i < n; ++i) {
241  // std::cout << V[i] << " ";
242  // }
243  // std::cout << std::endl;
244 }
245 //____________________________________________________________________________
246 template <typename AFloat>
248  size_t filterHeight, size_t filterWidth, size_t numFilters)
249 {
250  size_t jump = filterHeight * filterWidth;
251  for (size_t j = 0; j < filterDepth; j++) {
252  for (size_t k = 0; k < numFilters; k++) {
253  for (size_t i = 0; i < jump; i++) {
254  A(j, k * jump + i) = B(k, ((j + 1) * jump - 1) - i);
255  //A(j, k * jump + i) = B(k, j * jump + i);
256  }
257  }
258  }
259 }
260 
261 //____________________________________________________________________________
262 template <typename AFloat>
264 {
265  int m = (int)output.GetNrows();
266  int n = (int)output.GetNcols();
267 
268  int inc = 1.0;
269  AFloat alpha = 1.0;
270 
271  AFloat *A = output.GetRawDataPointer();
272  const AFloat *x = biases.GetRawDataPointer();
273  const AFloat *y = TCpuMatrix<AFloat>::GetOnePointer();
274 
275  R__ASSERT(m <= (int)biases.GetNElements() );
277 
278  ::TMVA::DNN::Blas::Ger(&m, &n, &alpha, x, &inc, y, &inc, A, &m);
279 }
280 
281 //____________________________________________________________________________
282 template <typename AFloat>
283 void TCpu<AFloat>::ConvLayerForward(std::vector<TCpuMatrix<AFloat>> & output, std::vector<TCpuMatrix<AFloat>> & derivatives,
284  const std::vector<TCpuMatrix<AFloat>> &input,
285  const TCpuMatrix<AFloat> & weights, const TCpuMatrix<AFloat> & biases,
286  EActivationFunction activFunc, const std::vector<int> & vIndices,
287  size_t nlocalViews, size_t nlocalViewPixels,
288  AFloat /* dropoutProbability */, bool /* applyDropout */)
289 {
290 
291  //TCpuMatrix<AFloat> inputTr(this->GetNLocalViews(), this->GetNLocalViewPixels());
292  //this should fix multi-thread inizializations of arrays
294  TCpuMatrix<AFloat>::InitializeOneVector(output[0].GetNcols()); // since it is used in AddCOnvBiases
295 
296 
297  auto f = [&] (UInt_t i)
298  {
299  // dropout not yet implemented for CNN
300  // if (applyDropout && (dropoutProbability != 1.0)) {
301  // Dropout(input[i], dropoutProbability);
302  // }
303 
304  TCpuMatrix<AFloat> inputTr(nlocalViews, nlocalViewPixels);
305  //inputTr.Zero(); // this is not thread safe
306 
307  Im2colFast(inputTr, input[i], vIndices);
308 
309  MultiplyTranspose(output[i], weights, inputTr);
310  AddConvBiases(output[i], biases);
311 
312  evaluateDerivative<TCpu<AFloat>>(derivatives[i], activFunc, output[i]);
313  evaluate<TCpu<AFloat>>(output[i], activFunc);
314 
315  };
316 
318 
319 }
320 //____________________________________________________________________________
321 template <typename AFloat>
322 void TCpu<AFloat>::ConvLayerBackward(std::vector<TCpuMatrix<AFloat>> &activationGradientsBackward,
323  TCpuMatrix<AFloat> &weightGradients, TCpuMatrix<AFloat> &biasGradients,
324  std::vector<TCpuMatrix<AFloat>> &df,
325  const std::vector<TCpuMatrix<AFloat>> &activationGradients,
326  const TCpuMatrix<AFloat> &weights,
327  const std::vector<TCpuMatrix<AFloat>> &activationsBackward, size_t batchSize,
328  size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width,
329  size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
330 {
331  // Update derivatives
332  // size_t m, n;
333  // m = activationGradients[0].GetNrows();
334  // n = activationGradients[0].GetNcols();
335 
336  for (size_t i = 0; i < batchSize; i++) {
337  // Compute element-wise product.
338  Hadamard(df[i], activationGradients[i]);
339  }
340 
341  // Calculate the activation gradients of the previous layer
342  CalculateConvActivationGradients(activationGradientsBackward, df, weights, batchSize, inputHeight, inputWidth, depth,
343  height, width, filterDepth, filterHeight, filterWidth);
344 
345  // Calculate the weight gradients
346  CalculateConvWeightGradients(weightGradients, df, activationsBackward, batchSize, inputHeight, inputWidth, depth,
347  height, width, filterDepth, filterHeight, filterWidth, nLocalViews);
348 
349  // Calculate the bias gradients
350  CalculateConvBiasGradients(biasGradients, df, batchSize, depth, nLocalViews);
351 }
352 
353 //____________________________________________________________________________
354 template <typename AFloat>
355 void TCpu<AFloat>::CalculateConvActivationGradients(std::vector<TCpuMatrix<AFloat>> &activationGradientsBackward,
356  const std::vector<TCpuMatrix<AFloat>> &df,
357  const TCpuMatrix<AFloat> &weights, size_t batchSize,
358  size_t inputHeight, size_t inputWidth, size_t depth, size_t height,
359  size_t width, size_t filterDepth, size_t filterHeight,
360  size_t filterWidth)
361 {
362  if (activationGradientsBackward.size() == 0) return;
363 
364 
365  // Transform the weights
366 
367  //PrintMatrix(weights,"weights");
368  // filter depth must be same as input depth
369  TCpuMatrix<AFloat> rotWeights(filterDepth, depth * filterHeight * filterWidth);
370  RotateWeights(rotWeights, weights, filterDepth, filterHeight, filterWidth, weights.GetNrows());
371  //PrintMatrix(rotWeights,"rot-weights");
372 
373  // Calculate the zero paddings
374  size_t tempZeroPaddingHeight = (size_t)(floor((inputHeight - height + filterHeight - 1) / 2));
375  size_t tempZeroPaddingWidth = (size_t)(floor((inputWidth - width + filterWidth - 1) / 2));
376 
377  // size_t tempZeroPaddingHeight = 1;
378  // size_t tempZeroPaddingWidth = 1;
379 
380  // Calculate the number of local views and the number of pixles in each view
381  size_t tempNLocalViews = inputHeight * inputWidth;
382  size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;
383 
384  size_t tempStrideRows = 1;
385  size_t tempStrideCols = 1;
386 
387  // An entire convolution follows
388 
389  std::vector<int> vIndices( tempNLocalViews * tempNLocalViewPixels );
390  Im2colIndices(vIndices, df[0], tempNLocalViews, height, width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
391  tempZeroPaddingHeight, tempZeroPaddingWidth);
392 
393 
394  //for (size_t i = 0; i < batchSize; i++) {
395  R__ASSERT(batchSize == df.size() );
396  R__ASSERT(batchSize == activationGradientsBackward.size() );
397  auto f = [&] (UInt_t i)
398  {
399 
400  // Im2col(dfTr, df[i], height, width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
401  // tempZeroPaddingHeight, tempZeroPaddingWidth);
402 
403  TCpuMatrix<AFloat> dfTr(tempNLocalViews, tempNLocalViewPixels);
404 
405  Im2colFast(dfTr, df[i], vIndices);
406 
407  //PrintMatrix(df[i],"df[i]");
408  //PrintMatrix(dfTr,"dfTr");
409 
410  MultiplyTranspose(activationGradientsBackward[i], rotWeights, dfTr);
411 
412  //PrintMatrix(activationGradientsBackward[i],"activGrad-result");
413 
414  };
415 
417 }
418 
419 //____________________________________________________________________________
420 template <typename AFloat>
422  const std::vector<TCpuMatrix<AFloat>> &df,
423  const std::vector<TCpuMatrix<AFloat>> &activationsBackward,
424  size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth,
425  size_t height, size_t width, size_t filterDepth, size_t filterHeight,
426  size_t filterWidth, size_t nLocalViews)
427 {
428  // reinitialize the weight gradients to 0
429  weightGradients.Zero();
430 
431  const size_t filterSize = filterHeight * filterWidth;
432  const size_t nLocalViewPixels = filterDepth * filterHeight * filterWidth;
433  R__ASSERT( weightGradients.GetNcols() == filterDepth * filterHeight * filterWidth);
434 
435  const size_t tempStrideRows = 1;
436  const size_t tempStrideCols = 1;
437 
438  // Calculate the zero paddings from the input height and width (assume stride =1 )
439  const size_t tempZeroPaddingHeight = (height - inputHeight + filterHeight - 1) / 2;
440  const size_t tempZeroPaddingWidth = (width - inputWidth + filterWidth - 1) / 2;
441 
442 
443  // convolution
444 
445 
446 
447  std::vector<int> vIndices(nLocalViews * nLocalViewPixels );
448  Im2colIndices(vIndices, activationsBackward[0], nLocalViews, inputHeight, inputWidth, filterHeight , filterWidth,
449  tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);
450 
451  //std::cout << "do back-propagation in conv layer - compute weight gradient" << std::endl;
452 
453  std::vector< TCpuMatrix<AFloat> > vres;//(batchSize);
454  for (size_t i = 0; i < batchSize; i++) {
455  vres.emplace_back(depth, nLocalViewPixels);
456  //PrintMatrix(df[i],"df");
457  //PrintMatrix(activationsBackward[i],"df");
458 
459  }
460 
461  auto fmap = [&](int i) {
462 
463  //PrintMatrix(df[i],"df-i");
464  TCpuMatrix<AFloat> xTr(nLocalViews, nLocalViewPixels);
465  TCpuMatrix<AFloat> res(depth, nLocalViewPixels);
466 
467  //computing t he gradient is equivalent of doing a convolution of the input using as conv kernel the delta's (the df[] values)
468  //N.B. only stride values=1 are now supported
469 
470  //xTr.Zero();
471  // Im2col(xTr, const_cast<TCpuMatrix<AFloat> &>(activationsBackward[i]), inputHeight, inputWidth, filterHeight , filterWidth,
472  // tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);
473  Im2colFast(xTr, activationsBackward[i], vIndices);
474 
475  //std::cout << "doing im2colfast" << std::endl;
476  //PrintMatrix(xTr,"xTr-i");
477  //PrintMatrix(activationsBackward[i],"actbackward-i");
478  Multiply(vres[i], df[i], xTr);
479  //PrintMatrix(vres[i],"res_ofMT");
480 
481  return;
482  //return res;
483  };
484 
486 
487 // auto freduce = [&](const std::vector<TCpuMatrix<AFloat>> & vres) {
488  R__ASSERT(vres.size() == batchSize);
489  for (size_t i = 0; i < batchSize; i++) {
490  //PrintMatrix(vres[i],"res");
491  for (size_t j = 0; j < depth; j++) {
492  for (size_t k = 0; k < filterDepth; k++) {
493  size_t kOffset = k * filterSize;
494  for (size_t l = 0; l < filterSize; l++) {
495  //weightGradients(j, k * (filterHeight * filterWidth) + l) += res(k, (tempNLocalViews - 1) - l);
496  weightGradients(j, kOffset + l) += vres[i](j, kOffset + l);
497  }
498  }
499  }
500  // PrintMatrix(weightGradients,"weights_i");
501  }
502  // };
503 
504  //TCpuMatrix<AFloat>::GetThreadExecutor().MapReduce(fmap, ROOT::TSeqI( batchSize ) , freduce);
505  //PrintMatrix(weightGradients,"W-Grad");
506 }
507 
508 //____________________________________________________________________________
509 template <typename AFloat>
511  size_t batchSize, size_t depth, size_t nLocalViews)
512 {
513  for (size_t i = 0; i < depth; i++) {
514  AFloat sum = 0;
515  for (size_t j = 0; j < nLocalViews; j++) {
516  for (size_t k = 0; k < batchSize; k++) {
517  sum += df[k](i, j);
518  }
519  }
520  biasGradients(i, 0) = sum;
521  }
522 }
523 
524 //____________________________________________________________________________
525 template <typename AFloat>
527  size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows,
528  size_t strideCols)
529 {
530  // image boudaries
531  int imgHeightBound = imgHeight - (fltHeight - 1) / 2 - 1;
532  int imgWidthBound = imgWidth - (fltWidth - 1) / 2 - 1;
533  size_t currLocalView = 0;
534 
535  // centers
536  for (int i = fltHeight / 2; i <= imgHeightBound; i += strideRows) {
537  for (int j = fltWidth / 2; j <= imgWidthBound; j += strideCols) {
538  // within local views
539  for (int m = 0; m < (Int_t)C.GetNrows(); m++) {
540  AFloat value = -std::numeric_limits<AFloat>::max();
541 
542  for (int k = i - fltHeight / 2; k <= Int_t(i + (fltHeight - 1) / 2); k++) {
543  for (int l = j - fltWidth / 2; l <= Int_t(j + (fltWidth - 1) / 2); l++) {
544  if (C(m, k * imgWidth + l) > value) {
545  value = C(m, k * imgWidth + l);
546  B(m, currLocalView) = k * imgWidth + l;
547  }
548  }
549  }
550  A(m, currLocalView) = value;
551  }
552  currLocalView++;
553  }
554  }
555 }
556 
557 //____________________________________________________________________________
558 template <typename AFloat>
559 void TCpu<AFloat>::MaxPoolLayerBackward(std::vector<TCpuMatrix<AFloat>> &activationGradientsBackward,
560  const std::vector<TCpuMatrix<AFloat>> &activationGradients,
561  const std::vector<TCpuMatrix<AFloat>> &indexMatrix, size_t batchSize,
562  size_t depth, size_t nLocalViews)
563 {
564  for (size_t i = 0; i < batchSize; i++) {
565  for (size_t j = 0; j < depth; j++) {
566 
567  // initialize to zeros
568  for (size_t t = 0; t < (size_t)activationGradientsBackward[i].GetNcols(); t++) {
569  activationGradientsBackward[i](j, t) = 0;
570  }
571 
572  // set values
573  for (size_t k = 0; k < nLocalViews; k++) {
574  AFloat grad = activationGradients[i](j, k);
575  size_t winningIdx = indexMatrix[i](j, k);
576  activationGradientsBackward[i](j, winningIdx) += grad;
577  }
578  }
579  }
580 }
581 
582 //____________________________________________________________________________
583 template <typename AFloat>
585 {
586  size_t nColsA = A.GetNcols();
587  size_t nColsB = B.GetNcols();
588 
589  for (size_t i = 0; i < A.GetNrows(); i++) {
590  for (size_t j = 0; j < A.GetNcols(); j++) {
591  size_t nElem = i * nColsA + j;
592  A(i, j) = B(nElem / nColsB, (nElem - 1) % nColsB);
593  }
594  }
595 }
596 
597 //____________________________________________________________________________
598 template <typename AFloat>
599 void TCpu<AFloat>::Flatten(TCpuMatrix<AFloat> &A, const std::vector<TCpuMatrix<AFloat>> &B, size_t size, size_t nRows,
600  size_t nCols)
601 {
602  for (size_t i = 0; i < (size_t)size; i++) {
603  for (size_t j = 0; j < (size_t)nRows; j++) {
604  for (size_t k = 0; k < (size_t)nCols; k++) {
605  A(i, j * nCols + k) = B[i](j, k);
606  }
607  }
608  }
609 }
610 
611 //____________________________________________________________________________
612 template <typename AFloat>
613 void TCpu<AFloat>::Deflatten(std::vector<TCpuMatrix<AFloat>> &A, const TCpuMatrix<AFloat> &B, size_t size, size_t nRows,
614  size_t nCols)
615 {
616  for (size_t i = 0; i < (size_t)size; i++) {
617  for (size_t j = 0; j < (size_t)nRows; j++) {
618  for (size_t k = 0; k < (size_t)nCols; k++) {
619  A[i](j, k) = B(i, j * nCols + k);
620  }
621  }
622  }
623 }
624 
625 //______________________________________________________________________________
626 template <typename AReal>
627 void TCpu<AReal>::Rearrange(std::vector<TCpuMatrix<AReal>> &out, const std::vector<TCpuMatrix<AReal>> &in)
628 {
629  // B x T x D out --- T x B x D in*/
630  size_t B = out.size();
631  size_t T = out[0].GetNrows();
632  size_t D = out[0].GetNcols();
633  if ((T != in.size()) || (B != in[0].GetNrows()) || (D != in[0].GetNcols())) {
634  std::cout << "Incompatible Dimensions\n"
635  << in.size() << "x" << in[0].GetNrows() << "x" << in[0].GetNcols() << " --> " << B << "x" << T << "x"
636  << D << "\n";
637  return;
638  }
639  for (size_t i = 0; i < B; ++i) {
640  for (size_t j = 0; j < T; ++j) {
641  for (size_t k = 0; k < D; ++k) {
642  out[i](j, k) = in[j](i, k);
643  }
644  }
645  }
646  return;
647 }
648 
649 } // namespace DNN
650 } // namespace TMVA
static void CalculateConvActivationGradients(std::vector< TCpuMatrix< Scalar_t >> &activationGradientsBackward, const std::vector< TCpuMatrix< Scalar_t >> &df, const TCpuMatrix< Scalar_t > &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
void Foreach(F func, unsigned nTimes)
Execute func (with no arguments) nTimes in parallel.
static void Im2col(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A...
Definition: Propagation.cxx:99
static double B[]
static long int sum(long int i)
Definition: Factory.cxx:2258
The TCpuMatrix class.
Definition: CpuMatrix.h:72
static void Rearrange(std::vector< TCpuMatrix< AReal >> &out, const std::vector< TCpuMatrix< AReal >> &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
auto * m
Definition: textangle.C:8
static void MultiplyTranspose(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &weights)
Matrix-multiply input with the transpose of and write the results into output.
Definition: Propagation.cxx:25
static void RotateWeights(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A...
static void Im2colIndices(std::vector< int > &V, const TCpuMatrix< AReal > &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
double T(double x)
Definition: ChebyshevPol.h:34
image html pict1_TGaxis_012 png width
Define new text attributes for the label number "labNum".
Definition: TGaxis.cxx:2551
void Ger(const int *m, const int *n, const Real_t *alpha, const Real_t *x, const int *incx, const Real_t *y, const int *incy, Real_t *A, const int *lda)
Add the outer product of x and y to the matrix A.
size_t GetNcols() const
Definition: CpuMatrix.h:127
static void AddConvBiases(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &biases)
Add the biases in the Convolutional Layer.
static void Im2colFast(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, const std::vector< int > &V)
#define R__ASSERT(e)
Definition: TError.h:96
static void InitializeOneVector(size_t n)
Definition: CpuMatrix.cxx:87
#define f(i)
Definition: RSha256.hxx:104
int Int_t
Definition: RtypesCore.h:41
static size_t GetNWorkItems(size_t nelements)
Definition: CpuMatrix.h:164
static void CalculateConvBiasGradients(TCpuMatrix< Scalar_t > &biasGradients, const std::vector< TCpuMatrix< Scalar_t >> &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static double A[]
double beta(double x, double y)
Calculates the beta function.
static void AddRowWise(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
Definition: Propagation.cxx:60
size_t GetNElements() const
Definition: CpuMatrix.h:128
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *B, const int *ldb, const Real_t *beta, Real_t *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.
Double_t x[n]
Definition: legend1.C:17
static void Backward(TCpuMatrix< Scalar_t > &activationGradientsBackward, TCpuMatrix< Scalar_t > &weightGradients, TCpuMatrix< Scalar_t > &biasGradients, TCpuMatrix< Scalar_t > &df, const TCpuMatrix< Scalar_t > &activationGradients, const TCpuMatrix< Scalar_t > &weights, const TCpuMatrix< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
Definition: Propagation.cxx:79
static void ConvLayerBackward(std::vector< TCpuMatrix< Scalar_t >> &activationGradientsBackward, TCpuMatrix< Scalar_t > &weightGradients, TCpuMatrix< Scalar_t > &biasGradients, std::vector< TCpuMatrix< Scalar_t >> &df, const std::vector< TCpuMatrix< Scalar_t >> &activationGradients, const TCpuMatrix< Scalar_t > &weights, const std::vector< TCpuMatrix< Scalar_t >> &activationBackward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void CalculateConvWeightGradients(TCpuMatrix< Scalar_t > &weightGradients, const std::vector< TCpuMatrix< Scalar_t >> &df, const std::vector< TCpuMatrix< Scalar_t >> &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer. ...
void Error(const char *location, const char *msgfmt,...)
static void MaxPoolLayerBackward(std::vector< TCpuMatrix< AReal >> &activationGradientsBackward, const std::vector< TCpuMatrix< AReal >> &activationGradients, const std::vector< TCpuMatrix< AReal >> &indexMatrix, size_t batchSize, size_t depth, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static double C[]
auto * a
Definition: textangle.C:12
unsigned int UInt_t
Definition: RtypesCore.h:42
double floor(double)
static ROOT::TThreadExecutor & GetThreadExecutor()
Definition: CpuMatrix.h:139
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:136
static void Reshape(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B)
Transform the matrix B to a matrix with different dimensions A.
static void ConvLayerForward(std::vector< TCpuMatrix< Scalar_t >> &output, std::vector< TCpuMatrix< Scalar_t >> &derivatives, const std::vector< TCpuMatrix< Scalar_t >> &input, const TCpuMatrix< Scalar_t > &weights, const TCpuMatrix< Scalar_t > &biases, EActivationFunction func, const std::vector< int > &vIndices, size_t nlocalViews, size_t nlocalViewPixels, Scalar_t dropoutProbability, bool applyDropout)
Forward propagation in the Convolutional layer.
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
Double_t y[n]
Definition: legend1.C:17
static const AFloat * GetOnePointer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
Definition: CpuMatrix.h:86
void Zero()
Clear content of the matrix and initialize to zero elements.
Definition: CpuMatrix.h:233
Abstract ClassifierFactory template that handles arbitrary types.
static void Downsample(TCpuMatrix< AReal > &A, TCpuMatrix< AReal > &B, const TCpuMatrix< AReal > &C, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void Flatten(TCpuMatrix< AReal > &A, const std::vector< TCpuMatrix< AReal >> &B, size_t size, size_t nRows, size_t nCols)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A...
auto * l
Definition: textangle.C:4
size_t GetNrows() const
Definition: CpuMatrix.h:126
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
EActivationFunction
Enum that represents layer activation functions.
Definition: Functions.h:31
static void Deflatten(std::vector< TCpuMatrix< AReal >> &A, const TCpuMatrix< AReal > &B, size_t index, size_t nRows, size_t nCols)
Transforms each row of B to a matrix and stores it in the tensor B.
const Int_t n
Definition: legend1.C:16