29 int i = blockDim.y * blockIdx.y + threadIdx.y;
30 int j = blockDim.x * blockIdx.x + threadIdx.x;
31 int tid = i * gridDim.x + j;
32 curand_init(seed + tid, 0, tid, state + tid);
37template<
typename AFloat>
39template<
typename AFloat>
41template<
typename AFloat>
43template<
typename AFloat>
45template<
typename AFloat>
47template<
typename AFloat>
49template<
typename AFloat>
51template <
typename AFloat>
56template<
typename AFloat>
58 : fNRows(0), fNCols(0), fElementBuffer()
64template<
typename AFloat>
66 : fNRows(
m), fNCols(
n), fElementBuffer(
m *
n, 0)
72template<
typename AFloat>
74 : fNRows(Host.GetNrows()), fNCols(Host.GetNcols()),
75 fElementBuffer(Host.GetNoElements(), 0)
81 for (
size_t j = 0; j <
fNCols; j++) {
82 for (
size_t i = 0; i <
fNRows; i++) {
83 buffer[
index] =
static_cast<AFloat
>(Host(i, j));
89 cudaMemcpyHostToDevice);
93template<
typename AFloat>
96 : fNRows(
m), fNCols(
n), fElementBuffer(buffer)
102template <
typename AFloat>
105 if (fInstances == 0) {
106 cublasCreate(&fCublasHandle);
107 CUDACHECK(cudaMalloc(& fDeviceReturn,
sizeof(AFloat)));
112 if (fNCurandStates > 10000000)
113 std::cout <<
"***** Warning - initialize a BIG curandstate for matrix " << fNRows <<
"," << fNCols <<
" nstate "
114 << fNCurandStates << std::endl;
117 cudaFree(fCurandStates);
120 InitializeCurandStates();
122 if (fNRows > fNOnes) {
127 cudaMalloc(&fOnes, fNRows *
sizeof(AFloat));
128 AFloat * buffer =
new AFloat[fNRows];
129 for (
size_t i = 0; i < fNRows; i++) {
132 cudaMemcpy(fOnes, buffer, fNRows *
sizeof(AFloat),
133 cudaMemcpyHostToDevice);
139template<
typename AFloat>
144 CurandInitializationKernel<<<gridDims, blockDims>>>(time(
nullptr), fCurandStates);
149template<
typename AFloat>
154 AFloat * buffer =
new AFloat[fNRows * fNCols];
155 cudaMemcpy(buffer, fElementBuffer.data(), fNRows * fNCols *
sizeof(AFloat),
156 cudaMemcpyDeviceToHost);
159 for (
size_t j = 0; j < fNCols; j++) {
160 for (
size_t i = 0; i < fNRows; i++) {
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
TCudaDeviceBuffer< AFloat > fElementBuffer
void InitializeCuda()
Initializes all shared devices resource and makes sure that a sufficient number of curand states are ...
void InitializeCurandStates()
static dim3 BlockDims2D()
static dim3 GridDims2D(int nrows, int ncols)
static int NThreads(const AMatrix &A)
__global__ void CurandInitializationKernel(unsigned long long seed, curandState_t *state)
create variable transformations