32template <
typename AFloat>
39 int n = (
int)Weights.GetNrows();
41 if ((
int)
output.GetNrows() !=
m) {
42 Error(
"MultiplyTranspose",
"Invalid input - output rows - input: %d != output : %d",
m, (
int)
output.GetNrows());
45 if ((
int)
output.GetNcols() !=
n) {
46 Error(
"MultiplyTranspose",
"Invalid output cols or weight rows - output cols: %d != weight rows : %d",(
int)
output.GetNcols(),
n);
49 if ((
int)Weights.GetNcols() != k) {
50 Error(
"MultiplyTranspose",
"Invalid input cols or weight cols - input cols: %d != weight cols : %d", k, (
int) Weights.GetNcols());
62 const AFloat *A =
input.GetRawDataPointer();
63 const AFloat *B = Weights.GetRawDataPointer();
64 AFloat *
C =
output.GetRawDataPointer();
66 ::TMVA::DNN::Blas::Gemm(&transa, &transb, &
m, &
n, &k, &alpha, A, &
m, B, &
n, &beta, C, &
m);
74template <
typename AFloat>
84 AFloat *A =
output.GetRawDataPointer();
86 const AFloat *
y = biases.GetRawDataPointer();
89 R__ASSERT(
n <= (
int)(biases.GetNcols()*biases.GetNrows()));
99template <
typename AFloat>
111 if (activationGradientsBackward.
GetSize() > 0 ) {
113 Matrix_t activationGradientsBackward_m = activationGradientsBackward.
GetMatrix();
115 Multiply(activationGradientsBackward_m, df_m, weights);
119 if (weightGradients.
GetNoElements() > 0) TransposeMultiply(weightGradients, df_m, activationsBackward.
GetMatrix());
125 if (biasGradients.
GetNoElements() > 0) SumColumns(biasGradients, df_m);
131template <
typename AFloat>
133 size_t fltHeight,
size_t fltWidth,
size_t strideRows,
size_t strideCols,
134 size_t zeroPaddingHeight,
size_t zeroPaddingWidth)
138 int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
139 int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
140 size_t currLocalView = 0;
142 const int halfFltHeight = fltHeight / 2;
143 const int halfFltWidth = fltWidth / 2;
144 const int halfFltHeightM1 = (fltHeight - 1) / 2;
145 const int halfFltWidthM1 = (fltWidth - 1) / 2;
146 const int nRowsInput = B.
GetNrows();
147 const int nColsInput = B.
GetNcols();
148 const int nRowsOutput = A.
GetNrows();
149 const int nColsOutput = A.
GetNcols();
152 for (
int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {
153 for (
int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {
154 size_t currLocalViewPixel = 0;
157 R__ASSERT((
int) currLocalView < nRowsOutput );
159 for (
int m = 0;
m < nRowsInput;
m++) {
160 for (
int k = i - halfFltHeight ; k <=
Int_t(i + halfFltHeightM1 ); k++) {
161 int kstep = k * imgWidth;
162 for (
int l = j - halfFltWidth ;
l <=
Int_t(j + halfFltWidthM1);
l++) {
165 R__ASSERT((
int) currLocalViewPixel < nColsOutput );
167 if (k < 0 || k >= (
Int_t)imgHeight || l < 0 || l >= (
Int_t)imgWidth || kstep +
l >= nColsInput)
168 A(currLocalView, currLocalViewPixel++) = 0;
170 A(currLocalView, currLocalViewPixel++) = B(
m, kstep +
l);
182template <
typename AFloat>
184 size_t fltHeight,
size_t fltWidth,
size_t strideRows,
size_t strideCols,
185 size_t zeroPaddingHeight,
size_t zeroPaddingWidth)
189 int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
190 int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
191 size_t currLocalView = 0;
193 const int halfFltHeight = fltHeight / 2;
194 const int halfFltWidth = fltWidth / 2;
195 const int halfFltHeightM1 = (fltHeight - 1) / 2;
196 const int halfFltWidthM1 = (fltWidth - 1) / 2;
197 const int nRowsInput = B.
GetNrows();
198 const int nColsInput = B.
GetNcols();
199 const size_t nSizeOutput = V.size();
200 const int npixels = nRowsInput * fltHeight * fltWidth;
205 for (
int i = halfFltHeight -zeroPaddingHeight; i <= imgHeightBound; i += strideRows) {
206 for (
int j = halfFltWidth -zeroPaddingWidth ; j <= imgWidthBound; j += strideCols) {
207 size_t currLocalViewPixel = 0;
212 for (
int m = 0;
m < nRowsInput;
m++) {
213 for (
int k = i - halfFltHeight ; k <=
Int_t(i + halfFltHeightM1 ); k++) {
214 int kstep = k * imgWidth;
215 for (
int l = j - halfFltWidth ;
l <=
Int_t(j + halfFltWidthM1);
l++) {
219 R__ASSERT(currLocalView * npixels + currLocalViewPixel < nSizeOutput );
220 if (k < 0 || k >= (
Int_t)imgHeight || l < 0 || l >= (
Int_t)imgWidth || kstep +
l >= nColsInput)
222 V[currLocalViewPixel * nLocalViews + currLocalView] = -1;
224 V[currLocalViewPixel * nLocalViews + currLocalView]= ( kstep +
l) * nRowsInput +
m;
226 currLocalViewPixel++;
234template <
typename AFloat>
249 for (
size_t j = 0; j < nsteps; ++j) {
250 size_t ii = workerID+j;
253 if (idx >= 0)
a[ii] =
b[idx];
263 for (
size_t ii = 0; ii <
n; ++ii) {
265 if (idx >= 0)
a[ii] =
b[idx];
272template <
typename AFloat>
274 size_t filterHeight,
size_t filterWidth,
size_t numFilters)
276 size_t jump = filterHeight * filterWidth;
277 for (
size_t j = 0; j < filterDepth; j++) {
278 for (
size_t k = 0; k < numFilters; k++) {
279 for (
size_t i = 0; i < jump; i++) {
280 A(j, k * jump + i) = B(k, ((j + 1) * jump - 1) - i);
288template <
typename AFloat>
298 AFloat *A =
output.GetRawDataPointer();
313template<
typename AFloat>
316 size_t temp = imgDim - fltDim + 2 * padding;
317 if (temp % stride || temp + stride <= 0) {
318 Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer - (imageDim, filterDim, padding, stride) "
319 "%zu, %zu, %zu, %zu", imgDim, fltDim, padding, stride);
321 return temp / stride + 1;
325template <
typename AFloat>
341 std::vector<int> forwardIndices(nLocalViews * nLocalViewPixels);
360 Im2colFast(inputTr,
input.At(i).GetMatrix(), forwardIndices);
363 MultiplyTranspose(output_m, weights, inputTr);
364 AddConvBiases(output_m, biases);
372 Copy(inputActivationFunc,
output);
379template <
typename AFloat>
390 size_t batchSize,
size_t inputHeight,
391 size_t inputWidth,
size_t depth,
393 size_t filterDepth,
size_t filterHeight,
394 size_t filterWidth,
size_t nLocalViews)
405 ActivationFunctionBackward(df, outputTensor, activationGradients, inputActivationFunc,
412 CalculateConvActivationGradients(activationGradientsBackward, df, weights, batchSize, inputHeight, inputWidth, depth,
413 height,
width, filterDepth, filterHeight, filterWidth);
416 CalculateConvWeightGradients(weightGradients, df, activationsBackward, batchSize, inputHeight, inputWidth, depth,
417 height,
width, filterDepth, filterHeight, filterWidth, nLocalViews);
420 CalculateConvBiasGradients(biasGradients, df, batchSize, depth, nLocalViews);
424template <
typename AFloat>
428 size_t inputHeight,
size_t inputWidth,
size_t depth,
size_t height,
429 size_t width,
size_t filterDepth,
size_t filterHeight,
432 if (activationGradientsBackward.
GetSize() == 0)
return;
435 activationGradientsBackward.
Zero();
443 RotateWeights(rotWeights, weights, filterDepth, filterHeight, filterWidth, weights.
GetNrows());
447 size_t tempZeroPaddingHeight = (size_t)(floor((inputHeight -
height + filterHeight - 1) / 2));
448 size_t tempZeroPaddingWidth = (size_t)(floor((inputWidth -
width + filterWidth - 1) / 2));
454 size_t tempNLocalViews = inputHeight * inputWidth;
455 size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;
457 size_t tempStrideRows = 1;
458 size_t tempStrideCols = 1;
462 std::vector<int> vIndices( tempNLocalViews * tempNLocalViewPixels );
463 Im2colIndices(vIndices, df.
At(0).GetMatrix(), tempNLocalViews,
height,
width, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
464 tempZeroPaddingHeight, tempZeroPaddingWidth);
477 Im2colFast(dfTr, df.
At(i).GetMatrix(), vIndices);
482 Matrix_t agb_m = activationGradientsBackward.
At(i).GetMatrix();
483 MultiplyTranspose(agb_m, rotWeights, dfTr);
493template <
typename AFloat>
497 size_t batchSize,
size_t inputHeight,
size_t inputWidth,
size_t depth,
498 size_t height,
size_t width,
size_t filterDepth,
size_t filterHeight,
499 size_t filterWidth,
size_t nLocalViews)
502 weightGradients.
Zero();
504 const size_t filterSize = filterHeight * filterWidth;
505 const size_t nLocalViewPixels = filterDepth * filterHeight * filterWidth;
506 R__ASSERT( weightGradients.
GetNcols() == filterDepth * filterHeight * filterWidth);
508 const size_t tempStrideRows = 1;
509 const size_t tempStrideCols = 1;
512 const size_t tempZeroPaddingHeight = (
height - inputHeight + filterHeight - 1) / 2;
513 const size_t tempZeroPaddingWidth = (
width - inputWidth + filterWidth - 1) / 2;
520 std::vector<int> vIndices(nLocalViews * nLocalViewPixels );
521 Im2colIndices(vIndices, activationsBackward.
At(0).GetMatrix(), nLocalViews, inputHeight, inputWidth, filterHeight , filterWidth,
522 tempStrideRows, tempStrideCols, tempZeroPaddingHeight, tempZeroPaddingWidth);
536 auto fmap = [&](
int i) {
548 Im2colFast(xTr, activationsBackward.
At(i).GetMatrix(), vIndices);
554 Multiply( mres, df.
At(i).GetMatrix(), xTr);
565 for (
size_t i = 0; i < batchSize; i++) {
568 for (
size_t j = 0; j < depth; j++) {
569 for (
size_t k = 0; k < filterDepth; k++) {
570 size_t kOffset = k * filterSize;
571 for (
size_t l = 0;
l < filterSize;
l++) {
573 weightGradients(j, kOffset +
l) += vres_m(j, kOffset +
l);
586template <
typename AFloat>
588 size_t batchSize,
size_t depth,
size_t nLocalViews)
590 biasGradients.
Zero();
591 for (
size_t i = 0; i < depth; i++) {
593 for (
size_t j = 0; j < nLocalViews; j++) {
594 for (
size_t k = 0; k < batchSize; k++) {
599 biasGradients(i, 0) =
sum;
604template <
typename AFloat>
608 size_t imgHeight,
size_t imgWidth,
size_t fltHeight,
size_t fltWidth,
size_t strideRows,
614 for (
size_t ifirst = 0; ifirst < tC.
GetFirstSize(); ++ifirst) {
621 int imgHeightBound = imgHeight - (fltHeight - 1) / 2 - 1;
622 int imgWidthBound = imgWidth - (fltWidth - 1) / 2 - 1;
623 size_t currLocalView = 0;
626 for (
int i = fltHeight / 2; i <= imgHeightBound; i += strideRows) {
627 for (
int j = fltWidth / 2; j <= imgWidthBound; j += strideCols) {
629 for (
int m = 0;
m < (
Int_t)C.GetNrows();
m++) {
630 AFloat
value = -std::numeric_limits<AFloat>::max();
632 for (
int k = i - fltHeight / 2; k <=
Int_t(i + (fltHeight - 1) / 2); k++) {
633 for (
int l = j - fltWidth / 2;
l <=
Int_t(j + (fltWidth - 1) / 2);
l++) {
634 if (C(
m, k * imgWidth +
l) >
value) {
635 value = C(
m, k * imgWidth +
l);
636 B(
m, currLocalView) = k * imgWidth +
l;
640 A(
m, currLocalView) =
value;
649template <
typename AFloat>
669 Matrix_t activationGradientsBackward_m = activationGradientsBackward.
At(
l).GetMatrix();
670 Matrix_t activationGradients_m = activationGradients.
At(
l).GetMatrix();
671 Matrix_t indexMatrix_m = indexMatrix.
At(
l).GetMatrix();
673 size_t depth = activationGradientsBackward_m.
GetNrows();
675 for (
size_t j = 0; j < depth; j++) {
677 for (
size_t t = 0; t < (size_t)activationGradientsBackward_m.
GetNcols(); t++) {
678 activationGradientsBackward_m(j, t) = 0;
682 for (
size_t k = 0; k < nLocalViews; k++) {
683 AFloat grad = activationGradients_m(j, k);
684 size_t winningIdx = indexMatrix_m(j, k);
685 activationGradientsBackward_m(j, winningIdx) += grad;
692template <
typename AFloat>
703 return x.
Reshape( {
x.GetShape().front(),
x.GetSize()/
x.GetShape().front()});
708template <
typename AFloat>
730 assert (
input.GetShape().size() == 2);
731 size_t n =
input.GetShape()[0];
732 size_t d =
input.GetShape()[1];
739 auto f = [&] (
size_t k)
747 for (
size_t i = 0; i <
n; i++) {
748 AFloat xi = inputK[i];
754 for (
size_t i = 0; i <
n; i++) {
755 AFloat xi = inputK[i];
756 double xmu = xi - meanK;
757 sq = sq + (xmu * xmu);
758 outputK[i] = AFloat(xmu);
761 variance(0,k) = sq /
n;
762 iVariance(0,k) = 1. / std::sqrt(variance(0,k) + epsilon);
764 double iVK = iVariance(0, k);
765 double gK = gamma(0, k);
766 double bK = beta(0, k);
767 for (
size_t i = 0; i <
n; i++) {
768 AFloat yi = outputK[i] ;
769 outputK[i] = AFloat( gK * iVK * yi + bK );
775 if (nTrainedBatches == 0) {
776 runningMeans(0,k) = mean(0,k);
777 runningVars(0,k) = variance(0,k) * (
n) / (
Scalar_t(
n - 1) + epsilon);
779 double decay = momentum;
780 if (momentum < 0) decay = nTrainedBatches/
Scalar_t(nTrainedBatches+1);
781 runningMeans(0,k) = decay * runningMeans(0,k) + (1. - decay) * mean(0,k);
782 runningVars(0,k) = decay * runningVars(0,k) + (1.-decay) * variance(0,k) * (
n) / (
Scalar_t(
n - 1) + epsilon);
793template <
typename AFloat>
806 assert (
input.GetShape().size() == 2);
807 size_t n =
input.GetShape()[0];
808 size_t d =
input.GetShape()[1];
813 auto f = [&] (
size_t k) {
818 double gK = gamma(0, k);
819 double bK = beta(0, k);
820 double mK = runningMeans(0, k);
821 double vK = 1. / (sqrt(runningVars(0, k) + epsilon));
824 for (
size_t i = 0; i <
n; i++) {
825 AFloat xi = inputK[i];
826 outputK[i] = AFloat( gK * (xi - mK) * vK + bK );
834template <
typename AFloat>
850 assert (outputGrad.
GetShape().size() == 2);
860 auto f = [&] (
size_t k) {
866 auto meanK = mean(0, k);
867 for (
size_t i = 0; i <
n; i++) {
868 AFloat xi = inputK[i];
869 double xhat = xi - meanK;
870 dbeta(0, k) += outputGradK[i];
871 dgamma(0, k) += outputGradK[i] * xhat;
873 double npSumDy = dbeta(0, k);
874 double npSumDyHMu = dgamma(0, k);
875 dgamma(0, k) *= iVariance(0, k);
878 double bterm = npSumDyHMu / (variance(0, k) + epsilon);
879 double aterm = (1. /
double(
n) * gamma(0, k) * iVariance(0, k));
880 for (
size_t i = 0; i <
n; i++) {
881 AFloat xi = inputK[i];
882 AFloat dyi = outputGradK[i];
883 double xmu = xi - meanK;
884 inputGradK[i] = AFloat( aterm * (
n * dyi - npSumDy - xmu * bterm) );
892template <
typename AFloat>
898 for (
size_t i = 0; i < A.
GetNrows(); i++) {
899 for (
size_t j = 0; j < A.
GetNcols(); j++) {
900 size_t nElem = i * nColsA + j;
901 A(i, j) = B(nElem / nColsB, nElem % nColsB);
907template <
typename AFloat>
924 assert ( A.
GetWSize() == nRows*nCols);
926 for (
size_t i = 0; i < bsize; i++) {
927 for (
size_t j = 0; j < nRows; j++) {
928 for (
size_t k = 0; k < nCols; k++) {
929 A( 0, i, j * nCols + k) = B(i, j, k);
943template <
typename AFloat>
956 assert ( B.
GetWSize() == nRows*nCols);
957 for (
size_t i = 0; i < (size_t)
size; i++) {
958 for (
size_t j = 0; j < (size_t)nRows; j++) {
959 for (
size_t k = 0; k < (size_t)nCols; k++) {
960 A(i, j, k) = B(0, i, j * nCols + k);
967template <
typename AFloat>
971 assert ( out.GetShape().size() == 3 && in.
GetShape().size() == 3);
974 size_t B = out.GetFirstSize();
975 size_t T = out.GetCSize();
976 size_t D = out.GetWSize();
978 std::cout <<
"Incompatible Dimensions\n"
984 for (
size_t i = 0; i < B; ++i) {
985 for (
size_t j = 0; j < T; ++j) {
986 for (
size_t k = 0; k < D; ++k) {
987 out( i, j, k ) = in( j, i, k);
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t height
A pseudo container class which is a generator of indices.
TCpuBuffer GetSubBuffer(size_t offset, size_t start) const
Return sub-buffer of size start starting at element offset.
static size_t GetOnePointerSize()
void Zero()
Clear content of the matrix and initialize to zero elements.
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
static const AFloat * GetOnePointer()
static size_t GetNWorkItems(size_t nelements)
static void InitializeOneVector(size_t n)
static Executor & GetThreadExecutor()
size_t GetNoElements() const
const TCpuBuffer< AFloat > & GetDeviceBuffer() const
TCpuMatrix< AFloat > GetMatrix() const
size_t GetFirstSize() const
TCpuTensor< AFloat > At(size_t i)
TCpuTensor< AFloat > Reshape(Shape_t shape) const
typename TMVA::Experimental::RTensor< AFloat >::Shape_t Shape_t
static void CalculateConvBiasGradients(Matrix_t &biasGradients, const Tensor_t &df, size_t batchSize, size_t depth, size_t nLocalViews)
Utility function for calculating the bias gradients of the convolutional layer.
static void Deflatten(Tensor_t &A, const Tensor_t &B)
Transforms each row of B to a matrix and stores it in the tensor B.
static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, const Tensor_t &activationGradients, const Tensor_t &indexMatrix, const Tensor_t &, const Tensor_t &, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t nLocalViews)
Perform the complete backward propagation step in a Pooling Layer.
static void ConvLayerForward(Tensor_t &output, Tensor_t &inputActivationFunc, const Tensor_t &input, const Matrix_t &weights, const Matrix_t &biases, const DNN::CNN::TConvParams ¶ms, EActivationFunction activFunc, Tensor_t &, const ConvDescriptors_t &, ConvWorkspace_t &)
Forward propagation in the Convolutional layer.
static void Im2colFast(Matrix_t &A, const Matrix_t &B, const std::vector< int > &V)
static void AddRowWise(Matrix_t &output, const Matrix_t &biases)
Add the vectors biases row-wise to the matrix output.
static void CalculateConvActivationGradients(Tensor_t &activationGradientsBackward, const Tensor_t &df, const Matrix_t &weights, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth)
Utility function for calculating the activation gradients of the layer before the convolutional layer...
static void BatchNormLayerForwardTraining(int axis, const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta, Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans, Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum, Scalar_t epsilon, const TensorDescriptor_t &bnParDescriptor)
The input from each batch are normalized during training to have zero mean and unit variance and they...
static void Backward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, const Tensor_t &df, const Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward)
Perform the complete backward propagation step.
static void Reshape(Matrix_t &A, const Matrix_t &B)
Transform the matrix B to a matrix with different dimensions A.
static void Rearrange(Tensor_t &out, const Tensor_t &in)
Rearrage data according to time fill B x T x D out with T x B x D matrix in.
static void MultiplyTranspose(Matrix_t &output, const Matrix_t &input, const Matrix_t &weights)
Matrix-multiply input with the transpose of weights and write the results into output.
static void CalculateConvWeightGradients(Matrix_t &weightGradients, const Tensor_t &df, const Tensor_t &activations_backward, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Utility function for calculating the weight gradients of the convolutional layer.
static size_t calculateDimension(size_t imgDim, size_t fltDim, size_t padding, size_t stride)
Calculate how many neurons "fit" in the output layer, given the input as well as the layer's hyperpar...
static void BatchNormLayerBackward(int axis, const Tensor_t &x, const Tensor_t &dy, Tensor_t &dx, Matrix_t &gamma, Matrix_t &dgamma, Matrix_t &dbeta, const Matrix_t &mean, const Matrix_t &variance, const Matrix_t &iVariance, Scalar_t epsilon, const TensorDescriptor_t &)
static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients, Matrix_t &biasGradients, Tensor_t &df, Tensor_t &activationGradients, const Matrix_t &weights, const Tensor_t &activationBackward, const Tensor_t &outputTensor, EActivationFunction activFunc, const ConvDescriptors_t &, ConvWorkspace_t &, size_t batchSize, size_t inputHeight, size_t inputWidth, size_t depth, size_t height, size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t nLocalViews)
Perform the complete backward propagation step in a Convolutional Layer.
static Tensor_t BatchNormLayerReshapeTensor(int axis, const Tensor_t &x)
static void Im2colIndices(std::vector< int > &V, const Matrix_t &B, size_t nLocalViews, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
static void Flatten(Tensor_t &A, const Tensor_t &B)
Flattens the tensor B, such that each matrix, is stretched in one row, resulting with a matrix A.
static void AddConvBiases(Matrix_t &output, const Matrix_t &biases)
Add the biases in the Convolutional Layer.
static void Im2col(Matrix_t &A, const Matrix_t &B, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols, size_t zeroPaddingHeight, size_t zeroPaddingWidth)
Transform the matrix B in local view format, suitable for convolution, and store it in matrix A.
static void BatchNormLayerForwardInference(int axis, const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta, Tensor_t &y, const Matrix_t &runningMeans, const Matrix_t &runningVars, Scalar_t epsilon, const TensorDescriptor_t &)
During inference the inputs are not normalized using the batch mean but the previously computed at ru...
static void Downsample(Tensor_t &A, Tensor_t &B, const Tensor_t &C, const PoolingDescriptors_t &, PoolingWorkspace_t &, size_t imgHeight, size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols)
Downsample the matrix C to the matrix A, using max operation, such that the winning indices are store...
TCpuMatrix< AReal > Matrix_t
static void RotateWeights(Matrix_t &A, const Matrix_t &B, size_t filterDepth, size_t filterHeight, size_t filterWidth, size_t numFilters)
Rotates the matrix B, which is representing a weights, and stores them in the matrix A.
static void AddRowWise(TMatrixT< Scalar_t > &output, const TMatrixT< Scalar_t > &biases)
Add the vectors biases row-wise to the matrix output.
static void AddConvBiases(TMatrixT< AReal > &output, const TMatrixT< AReal > &biases)
Add the biases in the Convolutional Layer.
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
std::size_t GetSize() const
const Shape_t & GetShape() const
double beta(double x, double y)
Calculates the beta function.
void Gemm(const char *transa, const char *transb, const int *m, const int *n, const int *k, const AReal *alpha, const AReal *A, const int *lda, const AReal *B, const int *ldb, const AReal *beta, AReal *C, const int *ldc)
Multiply the matrix A with the matrix B and store the result in C.
void Ger(const int *m, const int *n, const AReal *alpha, const AReal *x, const int *incx, const AReal *y, const int *incy, AReal *A, const int *lda)
Add the outer product of x and y to the matrix A.
EActivationFunction
Enum that represents layer activation functions.
create variable transformations
constexpr Double_t C()
Velocity of light in .
size_t strideRows
The number of row pixels to slid the filter each step.
size_t filterHeight
The height of the filter.
size_t inputHeight
The height of the previous layer or input.
size_t paddingWidth
The number of zero layers left and right of the input.
size_t filterWidth
The width of the filter.
size_t paddingHeight
The number of zero layers added top and bottom of the input.
size_t inputWidth
The width of the previous layer or input.
size_t inputDepth
The depth of the previous layer or input.
size_t strideCols
The number of column pixels to slid the filter each step.
static uint64_t sum(uint64_t i)