36 float alpha = 1.0, beta = 0.0;
38 cudaStream_t s = A.GetComputeStream();
45 A.GetDataPointer(),
m,
46 B.GetDataPointer(), k,
48 C.GetDataPointer(),
m);
50 C.SetComputeStream(s);
63 double alpha = 1.0, beta = 0.0;
65 cudaStream_t s = A.GetComputeStream();
72 A.GetDataPointer(),
m,
73 B.GetDataPointer(), k,
75 C.GetDataPointer(),
m);
77 C.SetComputeStream(s);
85 float alpha,
float beta)
93 cudaStream_t s = A.GetComputeStream();
100 A.GetDataPointer(), k,
101 B.GetDataPointer(), k,
103 C.GetDataPointer(),
m);
105 C.SetComputeStream(s);
112 double alpha,
double beta)
120 cudaStream_t s = A.GetComputeStream();
127 A.GetDataPointer(), k,
128 B.GetDataPointer(), k,
130 C.GetDataPointer(),
m);
132 C.SetComputeStream(s);
136template<
typename AFloat>
142 cudaStream_t s = A.GetComputeStream();
143 ::TMVA::DNN::Cuda::Hadamard<<<gridDims, blockDims, 0, s>>>(B.GetDataPointer(),
147 B.SetComputeStream(s);
150template<
typename AFloat>
155 int ncols = A.GetFirstSize();
156 int nrows = A.GetFirstStride();
158 ncols = A.GetWSize();
159 nrows = A.GetHSize();
163 cudaStream_t s = A.GetComputeStream();
164 ::TMVA::DNN::Cuda::Hadamard<<<gridDims, blockDims, 0, s>>>(B.GetDataPointer(),
167 B.SetComputeStream(s);
171template<
typename AFloat>
176 cudaStream_t s = A.GetComputeStream();
179 ::TMVA::DNN::Cuda::ReduceMatrix<<<gridDims, blockDims, 0, s>>>(
191 float alpha,
float beta)
198 cudaStream_t s = A.GetComputeStream();
204 A.GetDataPointer(),
m,
206 & beta, B.GetDataPointer(), 1);
208 B.SetComputeStream(s);
215 double alpha,
double beta)
222 cudaStream_t s = A.GetComputeStream();
228 A.GetDataPointer(),
m,
230 & beta, B.GetDataPointer(), 1);
232 B.SetComputeStream(s);
242 float alpha = 1.0, beta = 0.0;
244 cudaStream_t s = A.GetComputeStream();
250 A.GetDataPointer(),
m,
252 & beta, B.GetDataPointer(), 1);
254 B.SetComputeStream(s);
265 double alpha = 1.0, beta = 0.0;
267 cudaStream_t s = A.GetComputeStream();
273 A.GetDataPointer(),
m,
275 & beta, B.GetDataPointer(), 1);
277 B.SetComputeStream(s);
290template<
typename AFloat>
293 if (A.GetNrows() != B.GetNrows() || A.GetNcols() != B.GetNcols()) {
294 Fatal(
"AlmostEquals",
"The passed matrices have unequal shapes.");
299 cudaStream_t s = A.GetComputeStream();
305 ::TMVA::DNN::Cuda::AlmostEquals<<<gridDims, blockDims, 0, s>>>(
dResult, A.GetDataPointer(), B.GetDataPointer(),
306 epsilon, A.GetNrows(), A.GetNcols());
323 cublasSaxpy(A.GetCublasHandle(), A.GetNoElements(), &alpha,
324 A.GetDataPointer(), 1,
325 B.GetDataPointer(), 1);
336 cublasDaxpy(A.GetCublasHandle(), A.GetNoElements(), &alpha,
337 A.GetDataPointer(), 1,
338 B.GetDataPointer(), 1);
342template<
typename AFloat>
348 for (
size_t i = 0; i < A.GetFirstSize(); ++i) {
351 ScaleAdd(
B_m,
A_m, alpha);
356template<
typename AFloat>
361 cudaStream_t s = A.GetComputeStream();
362 ::TMVA::DNN::Cuda::ConstAdd<<<gridDims, blockDims, 0, s>>>(
370template<
typename AFloat>
375 cudaStream_t s = A.GetComputeStream();
376 ::TMVA::DNN::Cuda::ConstMult<<<gridDims, blockDims, 0, s>>>(
384template<
typename AFloat>
389 cudaStream_t s = A.GetComputeStream();
390 ::TMVA::DNN::Cuda::ReciprocalElementWise<<<gridDims, blockDims, 0, s>>>(
397template<
typename AFloat>
402 cudaStream_t s = A.GetComputeStream();
403 ::TMVA::DNN::Cuda::SquareElementWise<<<gridDims, blockDims, 0, s>>>(
410template<
typename AFloat>
415 cudaStream_t s = A.GetComputeStream();
416 ::TMVA::DNN::Cuda::SqrtElementWise<<<gridDims, blockDims, 0, s>>>(
424template<
typename AFloat>
429 cudaStream_t s = A.GetComputeStream();
430 ::TMVA::DNN::Cuda::AdamUpdate<<<gridDims, blockDims, 0, s>>>(
440template<
typename AFloat>
445 cudaStream_t s = A.GetComputeStream();
446 ::TMVA::DNN::Cuda::AdamUpdateFirstMom<<<gridDims, blockDims, 0, s>>>(
450 (
int) A.GetNcols(), beta);
454template<
typename AFloat>
459 cudaStream_t s = A.GetComputeStream();
460 ::TMVA::DNN::Cuda::AdamUpdateSecondMom<<<gridDims, blockDims, 0, s>>>(
464 (
int) A.GetNcols(), beta);
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Fatal(const char *location, const char *msgfmt,...)
Use this function in case of a fatal error. It will abort the program.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
static AFloat GetDeviceReturn()
Transfer the value in the device return buffer to the host.
static AFloat * GetDeviceReturnPointer()
Return device pointer to the device return buffer.
static void ResetDeviceReturn(AFloat value=0.0)
Set the return buffer on the device to the specified value.
static bool AlmostEquals(const Matrix_t &A, const Matrix_t &B, double epsilon=0.1)
Check two matrices for equality, taking floating point arithmetic errors into account.
static void SqrtElementWise(Matrix_t &A)
Square root each element of the matrix A and write the result into A.
static void SumRows(Matrix_t &B, const Matrix_t &A)
extra functions defined only for CPU architecture !!!
static void Multiply(Matrix_t &C, const Matrix_t &A, const Matrix_t &B)
Standard multiplication of two matrices A and B with the result being written into C.
static void AdamUpdate(Matrix_t &A, const Matrix_t &M, const Matrix_t &V, Scalar_t alpha, Scalar_t eps)
Adam updates.
static void SumColumns(Matrix_t &B, const Matrix_t &A, Scalar_t alpha=1.0, Scalar_t beta=0.)
Sum columns of (m x n) matrix A and write the results into the first m elements in A.
static void AdamUpdateSecondMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void Hadamard(Tensor_t &A, const Tensor_t &B)
In-place Hadamard (element-wise) product of matrices A and B with the result being written into A.
static void ReciprocalElementWise(Matrix_t &A)
Reciprocal each element of the matrix A and write the result into A.
static void SquareElementWise(Matrix_t &A)
Square each element of the matrix A and write the result into A.
static Scalar_t Sum(const Matrix_t &A)
Compute the sum of all elements in A.
static void ConstMult(Matrix_t &A, Scalar_t beta)
Multiply the constant beta to all the elements of matrix A and write the result into A.
static void AdamUpdateFirstMom(Matrix_t &A, const Matrix_t &B, Scalar_t beta)
static void ConstAdd(Matrix_t &A, Scalar_t beta)
Add the constant beta to all the elements of matrix A and write the result into A.
static void TransposeMultiply(Matrix_t &output, const Matrix_t &input, const Matrix_t &Weights, Scalar_t alpha=1.0, Scalar_t beta=0.)
Matrix multiplication of two matrices A and B^T (transposed) with the result being written into C.
static void ScaleAdd(Matrix_t &A, const Matrix_t &B, Scalar_t beta=1.0)
Adds a the elements in matrix B scaled by c to the elements in the matrix A.
static dim3 BlockDims2D()
static dim3 GridDims2D(int nrows, int ncols)
create variable transformations