Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
Buffers.cxx
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Jonas Rembser, CERN 11/2021
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
14
15#include <RooBatchCompute.h>
16
17#include <functional>
18#include <queue>
19#include <map>
20
21namespace ROOT {
22namespace Experimental {
23namespace Detail {
24
26public:
29 {
30 if (size != 1)
31 throw std::runtime_error("ScalarBufferContainer can only be of size 1");
32 }
33 std::size_t size() const { return 1; }
34
35 double const *cpuReadPtr() const { return &_val; }
36 double const *gpuReadPtr() const { return &_val; }
37
38 double *cpuWritePtr() { return &_val; }
39 double *gpuWritePtr() { return &_val; }
40
41private:
42 double _val;
43};
44
46public:
48 CPUBufferContainer(std::size_t size) : _vec(size) {}
49 std::size_t size() const { return _vec.size(); }
50
51 double const *cpuReadPtr() const { return _vec.data(); }
52 double const *gpuReadPtr() const
53 {
54 throw std::bad_function_call();
55 return nullptr;
56 }
57
58 double *cpuWritePtr() { return _vec.data(); }
59 double *gpuWritePtr()
60 {
61 throw std::bad_function_call();
62 return nullptr;
63 }
64
65private:
66 std::vector<double> _vec;
67};
68
70public:
73 {
74 _data = static_cast<double *>(RooBatchCompute::dispatchCUDA->cudaMalloc(size * sizeof(double)));
75 _size = size;
76 }
78 {
79 if (_data)
81 }
84 GPUBufferContainer(GPUBufferContainer &&other) { *this = std::move(other); }
86 {
87 _data = other._data;
88 other._data = nullptr;
89 _size = other._size;
90 other._size = 0;
91 return *this;
92 }
93 std::size_t size() const { return _size; }
94
95 double const *cpuReadPtr() const
96 {
97 throw std::bad_function_call();
98 return nullptr;
99 }
100 double const *gpuReadPtr() const { return static_cast<double *>(_data); }
101
102 double *cpuWritePtr() const
103 {
104 throw std::bad_function_call();
105 return nullptr;
106 }
107 double *gpuWritePtr() const { return static_cast<double *>(_data); }
108
109private:
110 double *_data = nullptr;
111 std::size_t _size;
112};
113
115public:
118 {
119 _data = static_cast<double *>(RooBatchCompute::dispatchCUDA->cudaMallocHost(size * sizeof(double)));
120 _size = size;
122 }
125 PinnedBufferContainer(PinnedBufferContainer &&other) { *this = std::move(other); }
127 {
128 _data = other._data;
129 other._data = nullptr;
130 _size = other._size;
131 other._size = 0;
132 _gpuBuffer = std::move(other._gpuBuffer);
133 return *this;
134 }
135 std::size_t size() const { return _size; }
136
137 void setCudaStream(cudaStream_t *stream) { _cudaStream = stream; }
138
139 double const *cpuReadPtr() const
140 {
141
145 }
146
148 return static_cast<double *>(_data);
149 }
150 double const *gpuReadPtr() const
151 {
152
156 }
157
159 return _gpuBuffer.gpuReadPtr();
160 }
161
162 double *cpuWritePtr()
163 {
165 return static_cast<double *>(_data);
166 }
167 double *gpuWritePtr()
168 {
170 return _gpuBuffer.gpuWritePtr();
171 }
172
173private:
175
176 double *_data = nullptr;
177 std::size_t _size;
179 cudaStream_t *_cudaStream = nullptr;
181};
182
183template <class Container>
184class BufferImpl : public AbsBuffer {
185public:
186 using Queue = std::queue<Container>;
187 using QueuesMap = std::map<std::size_t, Queue>;
188
189 BufferImpl(std::size_t size, QueuesMap &queuesMap) : _queue{queuesMap[size]}
190 {
191 if (_queue.empty()) {
192 _vec = Container(size);
193 } else {
194 _vec = std::move(_queue.front());
195 _queue.pop();
196 }
197 }
198
199 ~BufferImpl() override { _queue.emplace(std::move(_vec)); }
200
201 double const *cpuReadPtr() const override { return _vec.cpuReadPtr(); }
202 double const *gpuReadPtr() const override { return _vec.gpuReadPtr(); }
203
204 double *cpuWritePtr() override { return _vec.cpuWritePtr(); }
205 double *gpuWritePtr() override { return _vec.gpuWritePtr(); }
206
207 Container &vec() { return _vec; }
208
209private:
210 Container _vec;
212};
213
218
224};
225
227{
229}
230
232{
233 delete _queuesMaps;
234}
235
237{
239}
241{
243}
245{
247}
248AbsBuffer *BufferManager::makePinnedBuffer(std::size_t size, cudaStream_t *stream)
249{
251 out->vec().setCudaStream(stream);
252 return out;
253}
254
255} // end namespace Detail
256} // end namespace Experimental
257} // end namespace ROOT
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
BufferImpl(std::size_t size, QueuesMap &queuesMap)
Definition Buffers.cxx:189
std::map< std::size_t, Queue > QueuesMap
Definition Buffers.cxx:187
std::queue< Container > Queue
Definition Buffers.cxx:186
double const * gpuReadPtr() const override
Definition Buffers.cxx:202
double const * cpuReadPtr() const override
Definition Buffers.cxx:201
AbsBuffer * makeCpuBuffer(std::size_t size)
Definition Buffers.cxx:240
AbsBuffer * makePinnedBuffer(std::size_t size, cudaStream_t *stream=nullptr)
Definition Buffers.cxx:248
AbsBuffer * makeGpuBuffer(std::size_t size)
Definition Buffers.cxx:244
GPUBufferContainer(const GPUBufferContainer &)=delete
GPUBufferContainer & operator=(const GPUBufferContainer &)=delete
GPUBufferContainer(GPUBufferContainer &&other)
Definition Buffers.cxx:84
GPUBufferContainer & operator=(GPUBufferContainer &&other)
Definition Buffers.cxx:85
PinnedBufferContainer & operator=(const PinnedBufferContainer &)=delete
PinnedBufferContainer(const PinnedBufferContainer &)=delete
PinnedBufferContainer & operator=(PinnedBufferContainer &&other)
Definition Buffers.cxx:126
PinnedBufferContainer(PinnedBufferContainer &&other)
Definition Buffers.cxx:125
virtual void memcpyToCPU(void *, const void *, size_t, cudaStream_t *=nullptr)
virtual void memcpyToCUDA(void *, const void *, size_t, cudaStream_t *=nullptr)
tbb::task_arena is an alias of tbb::interface7::task_arena, which doesn't allow to forward declare tb...
R__EXTERN RooBatchComputeInterface * dispatchCUDA
PinnedBuffer::QueuesMap pinnedBufferQueuesMap
Definition Buffers.cxx:223
ScalarBuffer::QueuesMap scalarBufferQueuesMap
Definition Buffers.cxx:220