30template<
typename AFloat>
 
   31std::vector<cudnnHandle_t> TCudaTensor<AFloat>::fCudnnHandle(1);
 
   32template<
typename AFloat>
 
   36template<
typename AFloat>
 
   42template<
typename AFloat>
 
   46   const auto size = shape.size();
 
   47   std::vector<std::size_t> strides(
size);
 
   49      for (std::size_t i = 0; i < 
size; i++) {
 
   51            strides[
size - 1 - i] = 1;
 
   53            strides[
size - 1 - i] = strides[
size - 1 - i + 1] * shape[
size - 1 - i + 1];
 
   57      for (std::size_t i = 0; i < 
size; i++) {
 
   61            strides[i] = strides[i - 1] * shape[i - 1];
 
 
   70template<
typename AFloat>
 
   72    : 
fShape(), fStrides(), fNDim(0), 
fSize(0), fElementBuffer(), fStreamIndx(0), fTensorDescriptor(nullptr)
 
 
   79template<
typename AFloat>
 
   84      fTensorDescriptor(nullptr), fMemoryLayout(
layout)
 
 
   98template<
typename AFloat>
 
  121template<
typename AFloat>
 
  123                                 const std::vector<size_t> & shape,
 
  126   : fNDim(shape.
size()), fElementBuffer(buffer), 
fShape(shape), fStrides( shape.
size()), fDevice(
device),
 
 
  156template <
typename AFloat>
 
  165      fStrides.insert(fStrides.end(),dim-2,
fSize);
 
  168      SetTensorDescriptor();
 
 
  174template<
typename AFloat>
 
  178   if (GetLayout() == MemoryLayout::ColumnMajor &&
 
  179       (fNDim == 2 || (fNDim == 3 && GetFirstSize() == 1)) ) {
 
  190   if (GetLayout() == MemoryLayout::RowMajor) {
 
 
  209template <
typename AFloat>
 
  212   if (fTensorDescriptor && fTensorDescriptor.use_count() == 1 ) {
 
  219      fInstances[fStreamIndx]--;
 
  222      if (fInstances[fStreamIndx] <= 0) {
 
  223         std::cout << 
"All Cuda tensors are -released - destroy cudnn handle " << fInstances[fStreamIndx] << std::endl;
 
  232template <
typename AFloat>
 
  236   if (!fTensorDescriptor && 
fSize > 0 && fNDim >= 2) {
 
  246      if (fInstances.size() - 1 < fStreamIndx) {
 
  248         fInstances.resize(2 * fStreamIndx + 1, 0);
 
  251      if (fInstances[fStreamIndx] == 0) {
 
  252         std::cout << 
"TCudaTensor::create cudnn handle - cuDNN version " << 
CUDNN_VERSION << std::endl;
 
  270      if (std::is_same<AFloat, double>::value) {
 
  272      } 
else if (std::is_same<AFloat, float>::value) {
 
  277      fTensorDescriptor = std::make_shared<TensorDescriptor>();
 
  282      fInstances[fStreamIndx]++;
 
  285   SetTensorDescriptor();
 
  288template<
typename AFloat>
 
  290      if (!fTensorDescriptor) 
return;
 
  291      if (
fSize == 0) 
return;
 
  296      if (fNDim == 4 || fNDim > 1 && fMemoryLayout == MemoryLayout::ColumnMajor || fNDim == 2) {
 
  302            if (fMemoryLayout == MemoryLayout::RowMajor)
 
  303               shape.insert(shape.end(), 4 - fNDim, 1);
 
  305               shape.insert(shape.begin(), 4 - fNDim, 1);
 
  308         if (fMemoryLayout == MemoryLayout::RowMajor) {
 
  330      } 
else if (fNDim >2  || fNDim > 4) {
 
  336         std::vector<int> strides(fStrides.begin(), fStrides.end());
 
  361template <
typename AFloat>
 
  365template <
typename AFloat>
 
  369template<
typename AFloat>
 
  376template<
typename AFloat>
 
  384template<
typename AFloat>
 
  400   std::cout << 
"Data : { ";
 
  401   for (
size_t i = 0; i < 
n; ++i ) {
 
  403      std::cout << AFloat( TCudaDeviceReference<AFloat>(
elementPointer) );
 
  404      if (i < 
n-1) std::cout << 
" , ";
 
  406   if (
n < 
fSize) std::cout << 
"............   } ";
 
  407   std::cout << 
" } " << std::endl;
 
 
  409template<
typename AFloat>
 
  412      std::string 
memlayout = (GetLayout() == MemoryLayout::RowMajor) ? 
"RowMajor" : 
"ColMajor";
 
  413      std::cout << 
name << 
" shape : { ";
 
  414      for (
size_t i = 0; i < fNDim-1; ++i )
 
  415         std::cout << 
fShape[i] << 
" , ";
 
  416      std::cout << 
fShape.back() << 
" } " << 
" Layout : " << 
memlayout << std::endl;
 
 
  421template<
typename AFloat>
 
  428   AFloat * buffer = 
new AFloat[
fSize];
 
  434         hostTensor.GetData()[
j] = 
static_cast<AFloat
>(buffer[
j]);
 
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
void SetTensorDescriptor()
Shape_t fStrides
Strides between tensor dimensions (always assume dense, non overlapping tensor)
void InitializeCuda()
Initializes all shared devices resource and makes sure that a sufficient number of curand states are ...
static std::vector< int > fInstances
For each GPU device keep the CUDA streams in which tensors are used.
void InitializeCurandStates()
Shape_t fShape
The shape vector (size of dimensions) needs to be ordered as no.
void PrintShape(const char *name="Tensor") const
size_t fSize
No. of elements.
static std::vector< std::size_t > ComputeStridesFromShape(const std::vector< std::size_t > &shape, bool rowmajorLayout)
This information is needed for the multi-dimensional indexing.
TCudaDeviceBuffer< AFloat > fElementBuffer
void Print(const char *name="Tensor", bool truncate=false) const
RTensor is a container with contiguous memory and shape information.
MemoryLayout
Memory layout type (copy from RTensor.hxx)
create variable transformations