19#ifndef TMVA_DNN_ARCHITECTURES_CUDA_CUDATENSOR 
   20#define TMVA_DNN_ARCHITECTURES_CUDA_CUDATENSOR 
   36#define CUDNNCHECK(ans) {cudnnError((ans), __FILE__, __LINE__); } 
   45namespace Experimental {
 
   82template<
typename AFloat>
 
  138               const std::vector<size_t> & shape,
 
  142               const std::vector<size_t> & shape,
 
  158        if (
memlayout == MemoryLayout::ColumnMajor)
 
 
  174         TCudaTensor( buffer, {
n,
m}, MemoryLayout::ColumnMajor ,0,0) {}
 
 
  232      for (
size_t i = 0; i < 
fSize; i++) {
 
 
  246      for (
size_t i = 0; i < 
fSize; i++) {
 
 
  280      if  (
fNDim == 2) 
return 1;
 
 
  306      if ( 
GetLayout() == MemoryLayout::ColumnMajor &&
 
  315         return (
GetLayout() == MemoryLayout::ColumnMajor ) ?
 
  322         return  (
GetLayout() == MemoryLayout::ColumnMajor ) ?
 
 
  334      return At(i).GetMatrix();
 
 
 
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
 
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
 
TCudaTensor< AFloat > At(size_t i) const
 
const AFloat * GetDataPointerAt(size_t i) const
 
void SetTensorDescriptor()
 
const Shape_t & GetShape() const
 
AFloat * GetDataPointer()
 
std::vector< size_t > Shape_t
 
TCudaTensor(const TMatrixT< AFloat > &m, size_t dim=2)
 
const AFloat * GetData() const
 
size_t GetDimAt(size_t i) const
 
Shape_t fStrides
Strides between tensor dimensions (always assume dense, non overlapping tensor)
 
int fDevice
Device associated with current tensor instance.
 
bool isEqual(TCudaTensor< AFloat > &other)
 
TCudaTensor & operator=(TCudaTensor &&)=default
 
TCudaDeviceReference< AFloat > operator()(size_t i, size_t j) const
 
TCudaMatrix< AFloat > operator[](size_t i) const
 
TCudaTensor(size_t bsize, size_t csize, size_t hsize, size_t wsize, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)
 
size_t fNDim
Dimension of the tensor (first dimension is the batch size, second is the no. channels)
 
TCudaTensor & operator=(const TCudaTensor &)=default
 
cudaStream_t GetComputeStream() const
 
void InitializeCuda()
Initializes all shared devices resource and makes sure that a sufficient number of curand states are ...
 
MemoryLayout GetLayout() const
 
static std::vector< int > fInstances
For each GPU device keep the CUDA streams in which tensors are used.
 
TCudaDeviceBuffer< AFloat > & GetDeviceBuffer()
 
TCudaTensor(TCudaTensor &&)=default
 
TCudaTensor(size_t bsize, size_t csize, size_t hwsize, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)
 
TCudaTensor(const TCudaTensor &)=default
 
void InitializeCurandStates()
 
Shape_t fShape
The shape vector (size of dimensions) needs to be ordered as no.
 
bool isEqual(const AFloat *hostBufferOther, size_t otherSize)
 
AFloat * GetDataPointerAt(size_t i)
 
TCudaTensor(TCudaDeviceBuffer< AFloat > buffer, size_t n, size_t m)
 
void PrintShape(const char *name="Tensor") const
 
TCudaTensor< AFloat > Reshape(const Shape_t &newShape) const
 
size_t fSize
No. of elements.
 
TCudaMatrix< AFloat > GetMatrix() const
 
TCudaTensor(size_t n, size_t m, MemoryLayout memlayout=MemoryLayout::ColumnMajor, int deviceIndx=0, int streamIndx=0)
 
const AFloat * GetDataPointer() const
 
const Shape_t & GetStrides() const
 
static std::vector< std::size_t > ComputeStridesFromShape(const std::vector< std::size_t > &shape, bool rowmajorLayout)
This information is needed for the multi-dimensional indexing.
 
void ReshapeInPlace(const Shape_t &newShape)
 
TCudaDeviceBuffer< AFloat > fElementBuffer
 
size_t GetFirstStride() const
 
const TCudaDeviceBuffer< AFloat > & GetDeviceBuffer() const
 
MemoryLayout fMemoryLayout
 
TCudaDeviceReference< AFloat > operator()(size_t i, size_t j, size_t k, size_t l) const
 
void SetComputeStream(cudaStream_t stream)
 
TCudaDeviceReference< AFloat > operator()(size_t i, size_t j, size_t k) const
 
size_t GetFirstSize() const
 
void SetConstVal(const AFloat constVal)
 
int fStreamIndx
Cuda stream associated with current instance.
 
void Print(const char *name="Tensor", bool truncate=false) const
 
std::shared_ptr< TensorDescriptor > fTensorDescriptor
 
MemoryLayout
Memory layout type (copy from RTensor.hxx)
 
create variable transformations