Logo ROOT   6.14/05
Reference Guide
CudaBuffers.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 07/08/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ////////////////////////////////////////////////////
13 // Device and host buffer for CUDA architectures. //
14 ////////////////////////////////////////////////////
15 
16 #ifndef TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
17 #define TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
18 
19 #include "cuda.h"
20 #include "cuda_runtime.h"
21 
22 #include <memory>
23 
24 namespace TMVA {
25 namespace DNN {
26 
27 template<typename AFloat>
29 
30 /** TCudaHostBuffer
31  *
32  * Wrapper class for pinned memory buffers on the host. Uses
33  * std::shared_pointer with custom destructor to ensure consistent
34  * memory management and allow for easy copying/moving of the
35  * buffers. Copying is asynchronous and will set the cudaStream of the
36  * device buffer so that subsequent computations on the device buffer
37  * can be performed on the same stream.
38  *
39  * \tparam AFloat The floating point type to be stored in the buffers.
40  */
41 template<typename AFloat>
43 {
44 private:
45 
46  size_t fOffset; ///< Offset for sub-buffers
47  size_t fSize;
48  mutable cudaStream_t fComputeStream; ///< cudaStream for data transfer
49  std::shared_ptr<AFloat *> fHostPointer; ///< Pointer to the buffer data
50 
51  // Custom destructor required to free pinned host memory using cudaFree.
52  struct TDestructor
53  {
54  TDestructor() = default;
55  TDestructor(const TDestructor &) = default;
56  TDestructor( TDestructor &&) = default;
57  TDestructor & operator=(const TDestructor &) = default;
58  TDestructor & operator=( TDestructor &&) = default;
59  void operator()(AFloat ** devicePointer);
60  } fDestructor;
61 
63 
64 public:
65 
66  TCudaHostBuffer(size_t size);
67  TCudaHostBuffer(AFloat *);
68  TCudaHostBuffer() = default;
69  TCudaHostBuffer(const TCudaHostBuffer &) = default;
70  TCudaHostBuffer( TCudaHostBuffer &&) = default;
71  TCudaHostBuffer & operator=(const TCudaHostBuffer &) = default;
72  TCudaHostBuffer & operator=( TCudaHostBuffer &&) = default;
73 
74  /** Return sub-buffer of the current buffer. */
75  TCudaHostBuffer GetSubBuffer(size_t offset, size_t size);
76 
77  operator AFloat * () const;
78 
79  inline AFloat & operator[](size_t index);
80  inline AFloat operator[](size_t index) const;
81 
82  size_t GetSize() const {return fSize;}
83 
84 };
85 
86 /** TCudaDeviceBuffer
87  *
88  * Service class for on-device memory buffers. Uses
89  * std::shared_pointer with custom destructor to ensure consistent
90  * memory management and allow for easy copying/moving. A device
91  * buffer has an associated CUDA compute stream , which is used for
92  * implicit synchronization of data transfers.
93  *
94  * \tparam AFloat The floating point type to be stored in the buffers.
95  */
96 template<typename AFloat>
98 {
99 private:
100 
101  size_t fOffset; ///< Offset for sub-buffers
102  size_t fSize;
103  cudaStream_t fComputeStream; ///< cudaStream for data transfer
104  std::shared_ptr<AFloat *> fDevicePointer; ///< Pointer to the buffer data
105 
106  // Custom destructor required to free pinned host memory using cudaFree.
107  struct TDestructor
108  {
109  TDestructor() = default;
110  TDestructor(const TDestructor &) = default;
111  TDestructor( TDestructor &&) = default;
112  TDestructor & operator=(const TDestructor &) = default;
113  TDestructor & operator=( TDestructor &&) = default;
114  void operator()(AFloat ** devicePointer);
116  } fDestructor;
117 
118 public:
119 
120  TCudaDeviceBuffer(size_t size);
121  TCudaDeviceBuffer(size_t size, cudaStream_t stream);
122  TCudaDeviceBuffer(AFloat *, size_t size, cudaStream_t stream);
123  TCudaDeviceBuffer() = default;
124  TCudaDeviceBuffer(const TCudaDeviceBuffer &) = default;
125  TCudaDeviceBuffer( TCudaDeviceBuffer &&) = default;
126  TCudaDeviceBuffer & operator=(const TCudaDeviceBuffer &) = default;
128 
129  /** Return sub-buffer of the current buffer. */
130  TCudaDeviceBuffer GetSubBuffer(size_t offset, size_t size);
131  /** Convert to raw device data pointer.*/
132  operator AFloat * () const;
133 
134  void CopyFrom(const TCudaHostBuffer<AFloat> &) const;
135  void CopyTo(const TCudaHostBuffer<AFloat> &) const;
136 
137  cudaStream_t GetComputeStream() const {return fComputeStream;}
138  void SetComputeStream(cudaStream_t stream) {fComputeStream = stream;}
139 
140 };
141 
142 //
143 // Inline Functions.
144 //______________________________________________________________________________
145 
146 template<typename AFloat>
148 {
149  return (*fHostPointer + fOffset)[index];
150 }
151 
152 template<typename AFloat>
153 AFloat TCudaHostBuffer<AFloat>::operator[](size_t index) const
154 {
155  return (*fHostPointer + fOffset)[index];
156 }
157 
158 
159 } // namespace DNN
160 } // namespace TMVA
161 #endif
TDestructor & operator=(const TDestructor &)=default
std::shared_ptr< AFloat * > fHostPointer
Pointer to the buffer data.
Definition: CudaBuffers.h:49
void SetComputeStream(cudaStream_t stream)
Definition: CudaBuffers.h:138
TCudaDeviceBuffer.
Definition: CudaBuffers.h:28
size_t GetSize() const
Definition: CudaBuffers.h:82
cudaStream_t GetComputeStream() const
Definition: CudaBuffers.h:137
size_t fOffset
Offset for sub-buffers.
Definition: CudaBuffers.h:101
AFloat & operator[](size_t index)
Definition: CudaBuffers.h:147
struct TMVA::DNN::TCudaHostBuffer::TDestructor fDestructor
cudaStream_t fComputeStream
cudaStream for data transfer
Definition: CudaBuffers.h:48
TCudaHostBuffer.
Definition: CudaBuffers.h:42
void operator()(AFloat **devicePointer)
Definition: CudaBuffers.cxx:33
std::shared_ptr< AFloat * > fDevicePointer
Pointer to the buffer data.
Definition: CudaBuffers.h:104
Abstract ClassifierFactory template that handles arbitrary types.
TCudaHostBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
Definition: CudaBuffers.cxx:57
size_t fOffset
Offset for sub-buffers.
Definition: CudaBuffers.h:46
cudaStream_t fComputeStream
cudaStream for data transfer
Definition: CudaBuffers.h:103