Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
CudaInterface.cu
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Author:
4 * Jonas Rembser, CERN 2023
5 *
6 * Copyright (c) 2023, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13#include "CudaInterface.h"
14
15#include <stdexcept>
16#include <sstream>
17#include <string>
18
19#define ERRCHECK(err) __checkCudaErrors((err), __func__, __FILE__, __LINE__)
20inline static void __checkCudaErrors(cudaError_t error, std::string func, std::string file, int line)
21{
22 if (error != cudaSuccess) {
23 std::stringstream errMsg;
24 errMsg << func << "(), " << file + ":" << std::to_string(line) << " : " << cudaGetErrorString(error);
25 throw std::runtime_error(errMsg.str());
26 }
27}
28
29namespace RooBatchCompute {
30namespace CudaInterface {
31
32DeviceMemory::DeviceMemory(std::size_t n, std::size_t typeSize) : _size{n}
33{
34 void *ret;
35 ERRCHECK(::cudaMalloc(&ret, n * typeSize));
36 _data.reset(ret);
37}
38PinnedHostMemory::PinnedHostMemory(std::size_t n, std::size_t typeSize) : _size{n}
39{
40 void *ret;
41 ERRCHECK(::cudaMallocHost(&ret, n * typeSize));
42 _data.reset(ret);
43}
44
45template <>
46void Deleter<DeviceMemory>::operator()(void *ptr)
47{
48 ERRCHECK(::cudaFree(ptr));
49 ptr = nullptr;
50}
51template <>
52void Deleter<PinnedHostMemory>::operator()(void *ptr)
53{
54 ERRCHECK(::cudaFreeHost(ptr));
55 ptr = nullptr;
56}
57
58/**
59 * Creates a new CUDA event.
60 *
61 * @param[in] forTiming Set to true if the event is intended for timing purposes.
62 * If `false`, the `cudaEventDisableTiming` is passed to CUDA.
63 * @return CudaEvent object representing the new event.
64 */
65CudaEvent::CudaEvent(bool forTiming)
66{
67 auto event = new cudaEvent_t;
68 ERRCHECK(cudaEventCreateWithFlags(event, forTiming ? 0 : cudaEventDisableTiming));
69 _ptr.reset(event);
70}
71
72template <>
73void Deleter<CudaEvent>::operator()(void *ptr)
74{
75 auto event = reinterpret_cast<cudaEvent_t *>(ptr);
76 ERRCHECK(cudaEventDestroy(*event));
77 delete event;
78 ptr = nullptr;
79}
80
81template <>
82void Deleter<CudaStream>::operator()(void *ptr)
83{
84 auto stream = reinterpret_cast<cudaStream_t *>(ptr);
85 ERRCHECK(cudaStreamDestroy(*stream));
86 delete stream;
87 ptr = nullptr;
88}
89
90/**
91 * Records a CUDA event.
92 *
93 * @param[in] event CudaEvent object representing the event to be recorded.
94 * @param[in] stream CudaStream in which to record the event.
95 */
97{
98 ERRCHECK(::cudaEventRecord(event, stream));
99}
100
101/**
102 * Creates a new CUDA stream.
103 *
104 * @return CudaStream object representing the new stream.
105 */
106CudaStream::CudaStream()
107{
108 auto stream = new cudaStream_t;
109 ERRCHECK(cudaStreamCreate(stream));
110 _ptr.reset(stream);
111}
112
113/**
114 * Checks if a CUDA stream is currently active.
115 *
116 * @return True if the stream is active, false otherwise.
117 */
118bool CudaStream::isActive()
119{
120 cudaError_t err = cudaStreamQuery(*this);
121 if (err == cudaErrorNotReady)
122 return true;
123 else if (err == cudaSuccess)
124 return false;
125 ERRCHECK(err);
126 return false;
127}
128
129/**
130 * Makes a CUDA stream wait for a CUDA event.
131 *
132 * @param[in] event CudaEvent object representing the event to wait for.
133 */
134void CudaStream::waitForEvent(CudaEvent &event)
135{
136 ERRCHECK(::cudaStreamWaitEvent(*this, event, 0));
137}
138
139/**
140 * Calculates the elapsed time between two CUDA events.
141 *
142 * @param[in] begin CudaEvent representing the start event.
143 * @param[in] end CudaEvent representing the end event.
144 * @return Elapsed time in milliseconds.
145 */
147{
148 float ret;
149 ERRCHECK(::cudaEventElapsedTime(&ret, begin, end));
150 return ret;
151}
152
153/// \cond ROOFIT_INTERNAL
154
155void copyHostToDeviceImpl(const void *src, void *dest, size_t nBytes, CudaStream *stream)
156{
157 if (stream)
158 ERRCHECK(cudaMemcpyAsync(dest, src, nBytes, cudaMemcpyHostToDevice, *stream));
159 else
160 ERRCHECK(cudaMemcpy(dest, src, nBytes, cudaMemcpyHostToDevice));
161}
162
163void copyDeviceToHostImpl(const void *src, void *dest, size_t nBytes, CudaStream *stream)
164{
165 if (stream)
166 ERRCHECK(cudaMemcpyAsync(dest, src, nBytes, cudaMemcpyDeviceToHost, *stream));
167 else
168 ERRCHECK(cudaMemcpy(dest, src, nBytes, cudaMemcpyDeviceToHost));
169}
170
171void copyDeviceToDeviceImpl(const void *src, void *dest, size_t nBytes, CudaStream *stream)
172{
173 if (stream)
174 ERRCHECK(cudaMemcpyAsync(dest, src, nBytes, cudaMemcpyDeviceToDevice, *stream));
175 else
176 ERRCHECK(cudaMemcpy(dest, src, nBytes, cudaMemcpyDeviceToDevice));
177}
178
179/// \endcond
180
181} // namespace CudaInterface
182} // namespace RooBatchCompute
#define ERRCHECK(err)
static void __checkCudaErrors(cudaError_t error, std::string func, std::string file, int line)
RooAbsData * _data
Pointer to original input dataset.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t dest
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
virtual void reset()
TLine * line
const Int_t n
Definition legend1.C:16
void cudaEventRecord(CudaEvent &event, CudaStream &stream)
Records a CUDA event.
float cudaEventElapsedTime(CudaEvent &begin, CudaEvent &end)
Calculates the elapsed time between two CUDA events.
Namespace for dispatching RooFit computations to various backends.