Logo ROOT  
Reference Guide
CudaBuffers.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Simon Pfreundschuh 07/08/16
3
4/*************************************************************************
5 * Copyright (C) 2016, Simon Pfreundschuh *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12////////////////////////////////////////////////////
13// Device and host buffer for CUDA architectures. //
14////////////////////////////////////////////////////
15
16#ifndef TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
17#define TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
18
19#include "cuda.h"
20#include "cuda_runtime.h"
21
22#include <memory>
23
24namespace TMVA {
25namespace DNN {
26
27template<typename AFloat>
28class TCudaDeviceBuffer;
29
30/** TCudaHostBuffer
31 *
32 * Wrapper class for pinned memory buffers on the host. Uses
33 * std::shared_pointer with custom destructor to ensure consistent
34 * memory management and allow for easy copying/moving of the
35 * buffers. Copying is asynchronous and will set the cudaStream of the
36 * device buffer so that subsequent computations on the device buffer
37 * can be performed on the same stream.
38 *
39 * \tparam AFloat The floating point type to be stored in the buffers.
40 */
41template<typename AFloat>
43{
44private:
45
46 size_t fOffset; ///< Offset for sub-buffers
47 size_t fSize;
48 mutable cudaStream_t fComputeStream; ///< cudaStream for data transfer
49 std::shared_ptr<AFloat *> fHostPointer; ///< Pointer to the buffer data
50
51 // Custom destructor required to free pinned host memory using cudaFree.
53 {
54 TDestructor() = default;
55 TDestructor(const TDestructor &) = default;
56 TDestructor( TDestructor &&) = default;
57 TDestructor & operator=(const TDestructor &) = default;
59 void operator()(AFloat ** devicePointer);
61
63
64public:
65
66 TCudaHostBuffer(size_t size);
67 TCudaHostBuffer(AFloat *);
68 TCudaHostBuffer() = default;
69 TCudaHostBuffer(const TCudaHostBuffer &) = default;
73
74 /** Return sub-buffer of the current buffer. */
75 TCudaHostBuffer GetSubBuffer(size_t offset, size_t size);
76 /** Sets the entire buffer to a constant value */
77 void SetConstVal(const AFloat constVal);
78
79 operator AFloat * () const;
80
81 inline AFloat & operator[](size_t index);
82 inline AFloat operator[](size_t index) const;
83
84 size_t GetSize() const {return fSize;}
85
86};
87
88/** TCudaDeviceBuffer
89 *
90 * Service class for on-device memory buffers. Uses
91 * std::shared_pointer with custom destructor to ensure consistent
92 * memory management and allow for easy copying/moving. A device
93 * buffer has an associated CUDA compute stream , which is used for
94 * implicit synchronization of data transfers.
95 *
96 * \tparam AFloat The floating point type to be stored in the buffers.
97 */
98template<typename AFloat>
100{
101private:
102
103 size_t fOffset; ///< Offset for sub-buffers
104 size_t fSize;
105 cudaStream_t fComputeStream; ///< cudaStream for data transfer
106 std::shared_ptr<AFloat *> fDevicePointer; ///< Pointer to the buffer data
107
108 // Custom destructor required to free pinned host memory using cudaFree.
110 {
111 TDestructor() = default;
112 TDestructor(const TDestructor &) = default;
113 TDestructor( TDestructor &&) = default;
114 TDestructor & operator=(const TDestructor &) = default;
116 void operator()(AFloat ** devicePointer);
119
120public:
121
122 TCudaDeviceBuffer(size_t size);
123 TCudaDeviceBuffer(size_t size, cudaStream_t stream);
124 TCudaDeviceBuffer(AFloat *, size_t size, cudaStream_t stream);
125 TCudaDeviceBuffer() = default;
130
131 /** Return sub-buffer of the current buffer. */
132 TCudaDeviceBuffer GetSubBuffer(size_t offset, size_t size);
133 /** Convert to raw device data pointer.*/
134 operator AFloat * () const;
135
136 void CopyFrom(const TCudaHostBuffer<AFloat> &) const;
137 void CopyTo(const TCudaHostBuffer<AFloat> &) const;
138
139 size_t GetSize() const {return fSize;}
140 cudaStream_t GetComputeStream() const {return fComputeStream;}
141 void SetComputeStream(cudaStream_t stream) {fComputeStream = stream;}
142
143 size_t GetUseCount() const { return fDevicePointer.use_count(); }
144
145};
146
147//
148// Inline Functions.
149//______________________________________________________________________________
150
151template<typename AFloat>
153{
154 return (*fHostPointer + fOffset)[index];
155}
156
157template<typename AFloat>
158AFloat TCudaHostBuffer<AFloat>::operator[](size_t index) const
159{
160 return (*fHostPointer + fOffset)[index];
161}
162
163
164} // namespace DNN
165} // namespace TMVA
166#endif
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
TCudaDeviceBuffer.
Definition: CudaBuffers.h:100
size_t fOffset
Offset for sub-buffers.
Definition: CudaBuffers.h:103
void SetComputeStream(cudaStream_t stream)
Definition: CudaBuffers.h:141
void CopyFrom(const TCudaHostBuffer< AFloat > &) const
void CopyTo(const TCudaHostBuffer< AFloat > &) const
struct TMVA::DNN::TCudaDeviceBuffer::TDestructor fDestructor
TCudaDeviceBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
TCudaDeviceBuffer & operator=(TCudaDeviceBuffer &&)=default
cudaStream_t fComputeStream
cudaStream for data transfer
Definition: CudaBuffers.h:105
std::shared_ptr< AFloat * > fDevicePointer
Pointer to the buffer data.
Definition: CudaBuffers.h:106
TCudaDeviceBuffer(TCudaDeviceBuffer &&)=default
cudaStream_t GetComputeStream() const
Definition: CudaBuffers.h:140
TCudaDeviceBuffer & operator=(const TCudaDeviceBuffer &)=default
TCudaDeviceBuffer(const TCudaDeviceBuffer &)=default
TCudaHostBuffer.
Definition: CudaBuffers.h:43
AFloat & operator[](size_t index)
Definition: CudaBuffers.h:152
struct TMVA::DNN::TCudaHostBuffer::TDestructor fDestructor
TCudaHostBuffer & operator=(const TCudaHostBuffer &)=default
size_t GetSize() const
Definition: CudaBuffers.h:84
size_t fOffset
Offset for sub-buffers.
Definition: CudaBuffers.h:46
TCudaHostBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
Definition: CudaBuffers.cxx:60
TCudaHostBuffer(TCudaHostBuffer &&)=default
cudaStream_t fComputeStream
cudaStream for data transfer
Definition: CudaBuffers.h:48
void SetConstVal(const AFloat constVal)
Sets the entire buffer to a constant value.
Definition: CudaBuffers.cxx:70
TCudaHostBuffer(const TCudaHostBuffer &)=default
std::shared_ptr< AFloat * > fHostPointer
Pointer to the buffer data.
Definition: CudaBuffers.h:49
TCudaHostBuffer & operator=(TCudaHostBuffer &&)=default
create variable transformations
TDestructor & operator=(TDestructor &&)=default
TDestructor(const TDestructor &)=default
void operator()(AFloat **devicePointer)
Definition: CudaBuffers.cxx:79
TDestructor & operator=(const TDestructor &)=default
TDestructor & operator=(const TDestructor &)=default
TDestructor & operator=(TDestructor &&)=default
void operator()(AFloat **devicePointer)
Definition: CudaBuffers.cxx:36
TDestructor(const TDestructor &)=default
TDestructor(TDestructor &&)=default