24 template <
typename AFloat>
34 Error(
"MultiplyTranspose",
"Invalid input - output rows - input: %d != output : %d",m, (
int) output.
GetNrows());
38 Error(
"MultiplyTranspose",
"Invalid output cols or weight rows - output cols: %d != weight rows : %d",(
int) output.
GetNcols(),
n);
42 Error(
"MultiplyTranspose",
"Invalid input cols or weight cols - input cols: %d != weight cols : %d", k, (
int) Weights.
GetNcols());
56 ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha, A, &m, B, &n, &beta, C, &m);
59 template <
typename AFloat>
78 template <
typename AFloat>
85 Hadamard(df, activationGradients);
88 if (activationGradientsBackward.
GetNElements() > 0) Multiply(activationGradientsBackward, df, weights);
91 if (weightGradients.
GetNElements() > 0) TransposeMultiply(weightGradients, df, activationsBackward);
94 if (biasGradients.
GetNElements() > 0) SumColumns(biasGradients, df);
98 template <
typename AFloat>
100 size_t fltHeight,
size_t fltWidth,
size_t strideRows,
size_t strideCols,
101 size_t zeroPaddingHeight,
size_t zeroPaddingWidth)
105 int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
106 int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
107 size_t currLocalView = 0;
109 const int halfFltHeight = fltHeight / 2;
110 const int halfFltWidth = fltWidth / 2;
111 const int halfFltHeightM1 = (fltHeight - 1) / 2;
112 const int halfFltWidthM1 = (fltWidth - 1) / 2;
113 const int nRowsInput = B.
GetNrows();
114 const int nColsInput = B.
GetNcols();
115 const int nRowsOutput = A.
GetNrows();
116 const int nColsOutput = A.
GetNcols();
119 for (
int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {
120 for (
int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {
121 size_t currLocalViewPixel = 0;
124 R__ASSERT((
int) currLocalView < nRowsOutput );
126 for (
int m = 0;
m < nRowsInput;
m++) {
127 for (
int k = i - halfFltHeight ; k <=
Int_t(i + halfFltHeightM1 ); k++) {
128 int kstep = k * imgWidth;
129 for (
int l = j - halfFltWidth ;
l <=
Int_t(j + halfFltWidthM1);
l++) {
132 R__ASSERT((
int) currLocalViewPixel < nColsOutput );
134 if (k < 0 || k >= (
Int_t)imgHeight || l < 0 || l >= (
Int_t)imgWidth || kstep +
l >= nColsInput)
135 A(currLocalView, currLocalViewPixel++) = 0;
137 A(currLocalView, currLocalViewPixel++) =
B(
m, kstep +
l);
149 template <
typename AFloat>
151 size_t fltHeight,
size_t fltWidth,
size_t strideRows,
size_t strideCols,
152 size_t zeroPaddingHeight,
size_t zeroPaddingWidth)
156 int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
157 int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
158 size_t currLocalView = 0;
160 const int halfFltHeight = fltHeight / 2;
161 const int halfFltWidth = fltWidth / 2;
162 const int halfFltHeightM1 = (fltHeight - 1) / 2;
163 const int halfFltWidthM1 = (fltWidth - 1) / 2;
164 const int nRowsInput = B.
GetNrows();
165 const int nColsInput = B.
GetNcols();
166 const size_t nSizeOutput = V.size();
167 const int npixels = nRowsInput * fltHeight * fltWidth;
172 for (
int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {
173 for (
int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {
174 size_t currLocalViewPixel = 0;
179 for (
int m = 0;
m < nRowsInput;
m++) {
180 for (
int k = i - halfFltHeight ; k <=
Int_t(i + halfFltHeightM1 ); k++) {
181 int kstep = k * imgWidth;
182 for (
int l = j - halfFltWidth ;
l <=
Int_t(j + halfFltWidthM1);
l++) {
186 R__ASSERT(currLocalView * npixels + currLocalViewPixel < nSizeOutput );
187 if (k < 0 || k >= (
Int_t)imgHeight || l < 0 || l >= (
Int_t)imgWidth || kstep +
l >= nColsInput)
189 V[currLocalViewPixel * nLocalViews + currLocalView] = -1;
191 V[currLocalViewPixel * nLocalViews + currLocalView]= ( kstep +
l) * nRowsInput +
m;
193 currLocalViewPixel++;
201 template <
typename AFloat>
216 for (
size_t j = 0; j < nsteps; ++j) {
217 size_t ii = workerID+j;
220 if (idx >= 0) a[ii] = b[idx];
230 for (
size_t ii = 0; ii <
n; ++ii) {
232 if (idx >= 0) a[ii] = b[idx];
246 template <
typename AFloat>
248 size_t filterHeight,
size_t filterWidth,
size_t numFilters)
250 size_t jump = filterHeight * filterWidth;
251 for (
size_t j = 0; j < filterDepth; j++) {
252 for (
size_t k = 0; k < numFilters; k++) {
253 for (
size_t i = 0; i < jump; i++) {
254 A(j, k * jump + i) =
B(k, ((j + 1) * jump - 1) - i);
262 template <
typename AFloat>
282 template <
typename AFloat>
287 size_t nlocalViews,
size_t nlocalViewPixels,
307 Im2colFast(inputTr, input[i], vIndices);
309 MultiplyTranspose(
output[i], weights, inputTr);
310 AddConvBiases(
output[i], biases);
312 evaluateDerivative<TCpu<AFloat>>(derivatives[i], activFunc,
output[i]);
313 evaluate<TCpu<AFloat>>(
output[i], activFunc);
321 template <
typename AFloat>
328 size_t inputHeight,
size_t inputWidth,
size_t depth,
size_t height,
size_t width,
329 size_t filterDepth,
size_t filterHeight,
size_t filterWidth,
size_t nLocalViews)
336 for (
size_t i = 0; i < batchSize; i++) {
338 Hadamard(df[i], activationGradients[i]);
342 CalculateConvActivationGradients(activationGradientsBackward, df, weights, batchSize, inputHeight, inputWidth, depth,
343 height, width, filterDepth, filterHeight, filterWidth);
346 CalculateConvWeightGradients(weightGradients, df, activationsBackward, batchSize, inputHeight, inputWidth, depth,
347 height, width, filterDepth, filterHeight, filterWidth, nLocalViews);
350 CalculateConvBiasGradients(biasGradients, df, batchSize, depth, nLocalViews);
354 template <
typename AFloat>
358 size_t inputHeight,
size_t inputWidth,
size_t depth,
size_t height,
359 size_t width,
size_t filterDepth,
size_t filterHeight,
362 if (activationGradientsBackward.size() == 0)
return;
370 RotateWeights(rotWeights, weights, filterDepth, filterHeight, filterWidth, weights.
GetNrows());
374 size_t tempZeroPaddingHeight = (size_t)(
floor((inputHeight - height + filterHeight - 1) / 2));
375 size_t tempZeroPaddingWidth = (size_t)(
floor((inputWidth - width + filterWidth - 1) / 2));
381 size_t tempNLocalViews = inputHeight * inputWidth;
382 size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;
384 size_t tempStrideRows = 1;
385 size_t tempStrideCols = 1;
389 std::vector<int> vIndices( tempNLocalViews * tempNLocalViewPixels );
390 Im2colIndices(vIndices, df[0], tempNLocalViews, height, width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
391 tempZeroPaddingHeight, tempZeroPaddingWidth);
396 R__ASSERT(batchSize == activationGradientsBackward.size() );
405 Im2colFast(dfTr, df[i], vIndices);
410 MultiplyTranspose(activationGradientsBackward[i], rotWeights, dfTr);
420 template <
typename AFloat>
424 size_t batchSize,
size_t inputHeight,
size_t inputWidth,
size_t depth,
425 size_t height,
size_t width,
size_t filterDepth,
size_t filterHeight,
426 size_t filterWidth,
size_t nLocalViews)
429 weightGradients.
Zero();
431 const size_t filterSize = filterHeight * filterWidth;
432 const size_t nLocalViewPixels = filterDepth * filterHeight * filterWidth;
433 R__ASSERT( weightGradients.
GetNcols() == filterDepth * filterHeight * filterWidth);
435 const size_t tempStrideRows = 1;
436 const size_t tempStrideCols = 1;
439 const size_t tempZeroPaddingHeight = (height - inputHeight + filterHeight - 1) / 2;
440 const size_t tempZeroPaddingWidth = (width - inputWidth + filterWidth - 1) / 2;
447 std::vector<int> vIndices(nLocalViews * nLocalViewPixels );
448 Im2colIndices(vIndices, activationsBackward[0], nLocalViews, inputHeight, inputWidth, filterHeight , filterWidth,
449 tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);
453 std::vector< TCpuMatrix<AFloat> > vres;
454 for (
size_t i = 0; i < batchSize; i++) {
455 vres.emplace_back(depth, nLocalViewPixels);
461 auto fmap = [&](
int i) {
473 Im2colFast(xTr, activationsBackward[i], vIndices);
478 Multiply(vres[i], df[i], xTr);
489 for (
size_t i = 0; i < batchSize; i++) {
491 for (
size_t j = 0; j < depth; j++) {
492 for (
size_t k = 0; k < filterDepth; k++) {
493 size_t kOffset = k * filterSize;
494 for (
size_t l = 0;
l < filterSize;
l++) {
496 weightGradients(j, kOffset +
l) += vres[i](j, kOffset +
l);
509 template <
typename AFloat>
511 size_t batchSize,
size_t depth,
size_t nLocalViews)
513 for (
size_t i = 0; i < depth; i++) {
515 for (
size_t j = 0; j < nLocalViews; j++) {
516 for (
size_t k = 0; k < batchSize; k++) {
520 biasGradients(i, 0) =
sum;
525 template <
typename AFloat>
527 size_t imgHeight,
size_t imgWidth,
size_t fltHeight,
size_t fltWidth,
size_t strideRows,
531 int imgHeightBound = imgHeight - (fltHeight - 1) / 2 - 1;
532 int imgWidthBound = imgWidth - (fltWidth - 1) / 2 - 1;
533 size_t currLocalView = 0;
536 for (
int i = fltHeight / 2; i <= imgHeightBound; i += strideRows) {
537 for (
int j = fltWidth / 2; j <= imgWidthBound; j += strideCols) {
540 AFloat value = -std::numeric_limits<AFloat>::max();
542 for (
int k = i - fltHeight / 2; k <=
Int_t(i + (fltHeight - 1) / 2); k++) {
543 for (
int l = j - fltWidth / 2;
l <=
Int_t(j + (fltWidth - 1) / 2);
l++) {
544 if (
C(
m, k * imgWidth +
l) > value) {
545 value =
C(
m, k * imgWidth +
l);
546 B(
m, currLocalView) = k * imgWidth +
l;
550 A(
m, currLocalView) = value;
558 template <
typename AFloat>
562 size_t depth,
size_t nLocalViews)
564 for (
size_t i = 0; i < batchSize; i++) {
565 for (
size_t j = 0; j < depth; j++) {
568 for (
size_t t = 0; t < (size_t)activationGradientsBackward[i].GetNcols(); t++) {
569 activationGradientsBackward[i](j, t) = 0;
573 for (
size_t k = 0; k < nLocalViews; k++) {
574 AFloat grad = activationGradients[i](j, k);
575 size_t winningIdx = indexMatrix[i](j, k);
576 activationGradientsBackward[i](j, winningIdx) += grad;
583 template <
typename AFloat>
589 for (
size_t i = 0; i < A.
GetNrows(); i++) {
590 for (
size_t j = 0; j < A.
GetNcols(); j++) {
591 size_t nElem = i * nColsA + j;
592 A(i, j) =
B(nElem / nColsB, (nElem - 1) % nColsB);
598 template <
typename AFloat>
602 for (
size_t i = 0; i < (size_t)size; i++) {
603 for (
size_t j = 0; j < (size_t)nRows; j++) {
604 for (
size_t k = 0; k < (size_t)nCols; k++) {
605 A(i, j * nCols + k) =
B[i](j, k);
612 template <
typename AFloat>
616 for (
size_t i = 0; i < (size_t)size; i++) {
617 for (
size_t j = 0; j < (size_t)nRows; j++) {
618 for (
size_t k = 0; k < (size_t)nCols; k++) {
619 A[i](j, k) =
B(i, j * nCols + k);
626 template <
typename AReal>
630 size_t B = out.size();
631 size_t T = out[0].GetNrows();
632 size_t D = out[0].GetNcols();
633 if ((T != in.size()) || (B != in[0].GetNrows()) || (D != in[0].GetNcols())) {
634 std::cout <<
"Incompatible Dimensions\n" 635 << in.size() <<
"x" << in[0].GetNrows() <<
"x" << in[0].GetNcols() <<
" --> " << B <<
"x" << T <<
"x" 639 for (
size_t i = 0; i <
B; ++i) {
640 for (
size_t j = 0; j <
T; ++j) {
641 for (
size_t k = 0; k < D; ++k) {
642 out[i](j, k) = in[j](i, k);
static void CalculateConvActivationGradients(std::vector< TCpuMatrix< Scalar_t >> &activationGradientsBackward, const std::vector< TCpuMatrix< Scalar_t >> &df, const TCpuMatrix< Scalar_t > &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
void Foreach(F func, unsigned nTimes)
Execute func (with no arguments) nTimes in parallel.
static void Im2col(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A...
static long int sum(long int i)
static void Rearrange(std::vector< TCpuMatrix< AReal >> &out, const std::vector< TCpuMatrix< AReal >> &in)
Rearrage data accoring to time fill B x T x D out with T x B x D matrix in.
static void MultiplyTranspose(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &input, const TCpuMatrix< Scalar_t > &weights)
Matrix-multiply input with the transpose of and write the results into output.
static void RotateWeights(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A...
static void Im2colIndices(std::vector< int > &V, const TCpuMatrix< AReal > &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
image html pict1_TGaxis_012 png width
Define new text attributes for the label number "labNum".
void Ger(const int *m, const int *n, const Real_t *alpha, const Real_t *x, const int *incx, const Real_t *y, const int *incy, Real_t *A, const int *lda)
Add the outer product of x and y to the matrix A.
static void AddConvBiases(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &biases)
Add the biases in the Convolutional Layer.
static void Im2colFast(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B, const std::vector< int > &V)
static void InitializeOneVector(size_t n)
static size_t GetNWorkItems(size_t nelements)
static void CalculateConvBiasGradients(TCpuMatrix< Scalar_t > &biasGradients, const std::vector< TCpuMatrix< Scalar_t >> &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
double beta(double x, double y)
Calculates the beta function.
static void AddRowWise(TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
size_t GetNElements() const
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const Real_t *alpha, const Real_t *A, const int *lda, const Real_t *B, const int *ldb, const Real_t *beta, Real_t *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.
static void Backward(TCpuMatrix< Scalar_t > &activationGradientsBackward, TCpuMatrix< Scalar_t > &weightGradients, TCpuMatrix< Scalar_t > &biasGradients, TCpuMatrix< Scalar_t > &df, const TCpuMatrix< Scalar_t > &activationGradients, const TCpuMatrix< Scalar_t > &weights, const TCpuMatrix< Scalar_t > &activationBackward)
Perform the complete backward propagation step.
static void ConvLayerBackward(std::vector< TCpuMatrix< Scalar_t >> &activationGradientsBackward, TCpuMatrix< Scalar_t > &weightGradients, TCpuMatrix< Scalar_t > &biasGradients, std::vector< TCpuMatrix< Scalar_t >> &df, const std::vector< TCpuMatrix< Scalar_t >> &activationGradients, const TCpuMatrix< Scalar_t > &weights, const std::vector< TCpuMatrix< Scalar_t >> &activationBackward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static void CalculateConvWeightGradients(TCpuMatrix< Scalar_t > &weightGradients, const std::vector< TCpuMatrix< Scalar_t >> &df, const std::vector< TCpuMatrix< Scalar_t >> &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer. ...
void Error(const char *location, const char *msgfmt,...)
static void MaxPoolLayerBackward(std::vector< TCpuMatrix< AReal >> &activationGradientsBackward, const std::vector< TCpuMatrix< AReal >> &activationGradients, const std::vector< TCpuMatrix< AReal >> &indexMatrix, size_t batchSize, size_t depth, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static ROOT::TThreadExecutor & GetThreadExecutor()
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
static void Reshape(TCpuMatrix< AReal > &A, const TCpuMatrix< AReal > &B)
Transform the matrix B to a matrix with different dimensions A.
static void ConvLayerForward(std::vector< TCpuMatrix< Scalar_t >> &output, std::vector< TCpuMatrix< Scalar_t >> &derivatives, const std::vector< TCpuMatrix< Scalar_t >> &input, const TCpuMatrix< Scalar_t > &weights, const TCpuMatrix< Scalar_t > &biases, EActivationFunction func, const std::vector< int > &vIndices, size_t nlocalViews, size_t nlocalViewPixels, Scalar_t dropoutProbability, bool applyDropout)
Forward propagation in the Convolutional layer.
A pseudo container class which is a generator of indices.
static const AFloat * GetOnePointer()
Returns pointer to a vector holding only ones with a guaranteed length of the number of columns of ev...
void Zero()
Clear content of the matrix and initialize to zero elements.
Abstract ClassifierFactory template that handles arbitrary types.
static void Downsample(TCpuMatrix< AReal > &A, TCpuMatrix< AReal > &B, const TCpuMatrix< AReal > &C, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
static void Flatten(TCpuMatrix< AReal > &A, const std::vector< TCpuMatrix< AReal >> &B, size_t size, size_t nRows, size_t nCols)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A...
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
EActivationFunction
Enum that represents layer activation functions.
static void Deflatten(std::vector< TCpuMatrix< AReal >> &A, const TCpuMatrix< AReal > &B, size_t index, size_t nRows, size_t nCols)
Transforms each row of B to a matrix and stores it in the tensor B.