Logo ROOT  
Reference Guide
RooBatchCompute.h
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN 6 January 2021
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13#ifndef ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
14#define ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
15
17
18#include "DllImport.h" //for R__EXTERN, needed for windows
19#include "TError.h"
20
21#include <functional>
22#include <string>
23
24/**
25 * Namespace for dispatching RooFit computations to various backends.
26 *
27 * This namespace contains an interface for providing high-performance computation functions for use in
28 * RooAbsReal::evaluateSpan(), see RooBatchComputeInterface.
29 *
30 * Furthermore, several implementations of this interface can be created, which reside in RooBatchCompute::RF_ARCH,
31 * where RF_ARCH may be replaced by the architecture that this implementation targets, e.g. SSE, AVX, etc.
32 *
33 * Using the pointer RooBatchCompute::dispatch, a computation request can be dispatched to the fastest backend that is
34 * available on a specific platform.
35 */
36namespace RooBatchCompute {
37
39
74};
75
76/**
77 * \class RooBatchComputeInterface
78 * \ingroup Roobatchcompute
79 * \brief The interface which should be implemented to provide optimised computation functions for implementations of
80 * RooAbsReal::evaluateSpan().
81 *
82 * The class RooBatchComputeInterface provides the mechanism for external modules (like RooFit) to call
83 * functions from the library. The power lies in the virtual functions that can resolve to different
84 * implementations for the functionality; for example, calling a function through dispatchCuda
85 * will resolve to efficient cuda implementations.
86 *
87 * This interface contains the signatures of the compute functions of every PDF that has an optimised implementation
88 * available. These are the functions that perform the actual computations in batches.
89 *
90 * Several implementations of this interface may be provided, e.g. SSE, AVX, AVX2 etc. At run time, the fastest
91 * implementation of this interface is selected, and using a virtual call, the computation is dispatched to the best
92 * backend.
93 *
94 * \see RooBatchCompute::dispatch, RooBatchComputeClass, RF_ARCH
95 */
97public:
98 virtual ~RooBatchComputeInterface() = default;
99 virtual void compute(cudaStream_t *, Computer, RestrictArr, size_t, const VarVector &, const ArgVector & = {}) = 0;
100 virtual double sumReduce(cudaStream_t *, InputArr input, size_t n) = 0;
101 virtual Architecture architecture() const = 0;
102 virtual std::string architectureName() const = 0;
103
104 // cuda functions that need to be interfaced
105 virtual void *cudaMalloc(size_t) { throw std::bad_function_call(); }
106 virtual void cudaFree(void *) { throw std::bad_function_call(); }
107 virtual void *cudaMallocHost(size_t) { throw std::bad_function_call(); }
108 virtual void cudaFreeHost(void *) { throw std::bad_function_call(); }
109 virtual cudaEvent_t *newCudaEvent(bool /*forTiming*/) { throw std::bad_function_call(); }
110 virtual void deleteCudaEvent(cudaEvent_t *) { throw std::bad_function_call(); }
111 virtual cudaStream_t *newCudaStream() { throw std::bad_function_call(); }
112 virtual void deleteCudaStream(cudaStream_t *) { throw std::bad_function_call(); }
113 virtual bool streamIsActive(cudaStream_t *) { throw std::bad_function_call(); }
114 virtual void cudaEventRecord(cudaEvent_t *, cudaStream_t *) { throw std::bad_function_call(); }
115 virtual void cudaStreamWaitEvent(cudaStream_t *, cudaEvent_t *) { throw std::bad_function_call(); }
116 virtual float cudaEventElapsedTime(cudaEvent_t *, cudaEvent_t *) { throw std::bad_function_call(); }
117 virtual void memcpyToCUDA(void *, const void *, size_t, cudaStream_t * = nullptr) { throw std::bad_function_call(); }
118 virtual void memcpyToCPU(void *, const void *, size_t, cudaStream_t * = nullptr) { throw std::bad_function_call(); }
119};
120
121/**
122 * This dispatch pointer points to an implementation of the compute library, provided one has been loaded.
123 * Using a virtual call, computation requests are dispatched to backends with architecture-specific functions
124 * such as SSE, AVX, AVX2, etc.
125 *
126 * \see RooBatchComputeInterface, RooBatchComputeClass, RF_ARCH
127 */
129} // End namespace RooBatchCompute
130
131#endif
#define R__EXTERN
Definition: DllImport.h:27
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
The interface which should be implemented to provide optimised computation functions for implementati...
virtual void cudaEventRecord(cudaEvent_t *, cudaStream_t *)
virtual cudaEvent_t * newCudaEvent(bool)
virtual void compute(cudaStream_t *, Computer, RestrictArr, size_t, const VarVector &, const ArgVector &={})=0
virtual std::string architectureName() const =0
virtual float cudaEventElapsedTime(cudaEvent_t *, cudaEvent_t *)
virtual void memcpyToCPU(void *, const void *, size_t, cudaStream_t *=nullptr)
virtual double sumReduce(cudaStream_t *, InputArr input, size_t n)=0
virtual void cudaStreamWaitEvent(cudaStream_t *, cudaEvent_t *)
virtual bool streamIsActive(cudaStream_t *)
virtual void deleteCudaStream(cudaStream_t *)
virtual void deleteCudaEvent(cudaEvent_t *)
virtual void memcpyToCUDA(void *, const void *, size_t, cudaStream_t *=nullptr)
virtual Architecture architecture() const =0
const Int_t n
Definition: legend1.C:16
Namespace for dispatching RooFit computations to various backends.
std::vector< RooSpan< const double > > VarVector
R__EXTERN RooBatchComputeInterface * dispatchCUDA
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
const double *__restrict InputArr
std::vector< double > ArgVector
double *__restrict RestrictArr