30template<
typename AFloat>
31std::vector<cudnnHandle_t> TCudaTensor<AFloat>::fCudnnHandle(1);
32template<
typename AFloat>
36template<
typename AFloat>
42template<
typename AFloat>
46 const auto size = shape.size();
47 std::vector<std::size_t> strides(
size);
49 for (std::size_t i = 0; i <
size; i++) {
51 strides[
size - 1 - i] = 1;
53 strides[
size - 1 - i] = strides[
size - 1 - i + 1] * shape[
size - 1 - i + 1];
57 for (std::size_t i = 0; i <
size; i++) {
61 strides[i] = strides[i - 1] * shape[i - 1];
70template<
typename AFloat>
72 :
fShape(), fStrides(), fNDim(0),
fSize(0), fElementBuffer(), fStreamIndx(0), fTensorDescriptor(nullptr)
79template<
typename AFloat>
84 fTensorDescriptor(nullptr), fMemoryLayout(
layout)
98template<
typename AFloat>
121template<
typename AFloat>
123 const std::vector<size_t> & shape,
126 : fNDim(shape.
size()), fElementBuffer(buffer),
fShape(shape), fStrides( shape.
size()), fDevice(
device),
156template <
typename AFloat>
165 fStrides.insert(fStrides.end(),dim-2,
fSize);
168 SetTensorDescriptor();
174template<
typename AFloat>
178 if (GetLayout() == MemoryLayout::ColumnMajor &&
179 (fNDim == 2 || (fNDim == 3 && GetFirstSize() == 1)) ) {
190 if (GetLayout() == MemoryLayout::RowMajor) {
209template <
typename AFloat>
212 if (fTensorDescriptor && fTensorDescriptor.use_count() == 1 ) {
219 fInstances[fStreamIndx]--;
222 if (fInstances[fStreamIndx] <= 0) {
223 std::cout <<
"All Cuda tensors are -released - destroy cudnn handle " << fInstances[fStreamIndx] << std::endl;
232template <
typename AFloat>
236 if (!fTensorDescriptor &&
fSize > 0 && fNDim >= 2) {
246 if (fInstances.size() - 1 < fStreamIndx) {
248 fInstances.resize(2 * fStreamIndx + 1, 0);
251 if (fInstances[fStreamIndx] == 0) {
252 std::cout <<
"TCudaTensor::create cudnn handle - cuDNN version " <<
CUDNN_VERSION << std::endl;
270 if (std::is_same<AFloat, double>::value) {
272 }
else if (std::is_same<AFloat, float>::value) {
277 fTensorDescriptor = std::make_shared<TensorDescriptor>();
282 fInstances[fStreamIndx]++;
285 SetTensorDescriptor();
288template<
typename AFloat>
290 if (!fTensorDescriptor)
return;
291 if (
fSize == 0)
return;
296 if (fNDim == 4 || fNDim > 1 && fMemoryLayout == MemoryLayout::ColumnMajor || fNDim == 2) {
302 if (fMemoryLayout == MemoryLayout::RowMajor)
303 shape.insert(shape.end(), 4 - fNDim, 1);
305 shape.insert(shape.begin(), 4 - fNDim, 1);
308 if (fMemoryLayout == MemoryLayout::RowMajor) {
330 }
else if (fNDim >2 || fNDim > 4) {
336 std::vector<int> strides(fStrides.begin(), fStrides.end());
361template <
typename AFloat>
365template <
typename AFloat>
369template<
typename AFloat>
376template<
typename AFloat>
384template<
typename AFloat>
400 std::cout <<
"Data : { ";
401 for (
size_t i = 0; i <
n; ++i ) {
403 std::cout << AFloat( TCudaDeviceReference<AFloat>(
elementPointer) );
404 if (i <
n-1) std::cout <<
" , ";
406 if (
n <
fSize) std::cout <<
"............ } ";
407 std::cout <<
" } " << std::endl;
409template<
typename AFloat>
412 std::string
memlayout = (GetLayout() == MemoryLayout::RowMajor) ?
"RowMajor" :
"ColMajor";
413 std::cout <<
name <<
" shape : { ";
414 for (
size_t i = 0; i < fNDim-1; ++i )
415 std::cout <<
fShape[i] <<
" , ";
416 std::cout <<
fShape.back() <<
" } " <<
" Layout : " <<
memlayout << std::endl;
421template<
typename AFloat>
428 AFloat * buffer =
new AFloat[
fSize];
434 hostTensor.GetData()[
j] =
static_cast<AFloat
>(buffer[
j]);
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
void SetTensorDescriptor()
Shape_t fStrides
Strides between tensor dimensions (always assume dense, non overlapping tensor)
void InitializeCuda()
Initializes all shared devices resource and makes sure that a sufficient number of curand states are ...
static std::vector< int > fInstances
For each GPU device keep the CUDA streams in which tensors are used.
void InitializeCurandStates()
Shape_t fShape
The shape vector (size of dimensions) needs to be ordered as no.
void PrintShape(const char *name="Tensor") const
size_t fSize
No. of elements.
static std::vector< std::size_t > ComputeStridesFromShape(const std::vector< std::size_t > &shape, bool rowmajorLayout)
This information is needed for the multi-dimensional indexing.
TCudaDeviceBuffer< AFloat > fElementBuffer
void Print(const char *name="Tensor", bool truncate=false) const
RTensor is a container with contiguous memory and shape information.
MemoryLayout
Memory layout type (copy from RTensor.hxx)
create variable transformations