Logo ROOT  
Reference Guide
RooBatchCompute.h
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN 6 January 2021
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13#ifndef ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
14#define ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
15
17
18#include "DllImport.h" //for R__EXTERN, needed for windows
19#include "TError.h"
20
21#include <functional>
22
23/**
24 * Namespace for dispatching RooFit computations to various backends.
25 *
26 * This namespace contains an interface for providing high-performance computation functions for use in RooAbsReal::evaluateSpan(),
27 * see RooBatchComputeInterface.
28 *
29 * Furthermore, several implementations of this interface can be created, which reside in RooBatchCompute::RF_ARCH, where
30 * RF_ARCH may be replaced by the architecture that this implementation targets, e.g. SSE, AVX, etc.
31 *
32 * Using the pointer RooBatchCompute::dispatch, a computation request can be dispatched to the fastest backend that is available
33 * on a specific platform.
34 */
35namespace RooBatchCompute {
36
38
42
43/**
44 * \class RooBatchComputeInterface
45 * \ingroup Roobatchcompute
46 * \brief The interface which should be implemented to provide optimised computation functions for implementations of RooAbsReal::evaluateSpan().
47 *
48 * The class RooBatchComputeInterface provides the mechanism for external modules (like RooFit) to call
49 * functions from the library. The power lies in the virtual functions that can resolve to different
50 * implementations for the functionality; for example, calling a function through dispatchCuda
51 * will resolve to efficient cuda implementations.
52 *
53 * This interface contains the signatures of the compute functions of every PDF that has an optimised implementation available.
54 * These are the functions that perform the actual computations in batches.
55 *
56 * Several implementations of this interface may be provided, e.g. SSE, AVX, AVX2 etc. At run time, the fastest implementation of this interface
57 * is selected, and using a virtual call, the computation is dispatched to the best backend.
58 *
59 * \see RooBatchCompute::dispatch, RooBatchComputeClass, RF_ARCH
60 */
62 public:
63 virtual ~RooBatchComputeInterface() = default;
64 virtual void compute(cudaStream_t*, Computer, RestrictArr, size_t, const VarVector&, const ArgVector& ={}) = 0;
65 virtual double sumReduce(cudaStream_t*, InputArr input, size_t n) = 0;
66 virtual Architecture architecture() const = 0;
67 virtual std::string architectureName() const = 0;
68
69 //cuda functions that need to be interfaced
70 virtual void* cudaMalloc(size_t) { throw std::bad_function_call(); }
71 virtual void cudaFree(void*) { throw std::bad_function_call(); }
72 virtual void* cudaMallocHost(size_t) { throw std::bad_function_call(); }
73 virtual void cudaFreeHost(void*) { throw std::bad_function_call(); }
74 virtual cudaEvent_t* newCudaEvent(bool /*forTiming*/) { throw std::bad_function_call(); }
75 virtual void deleteCudaEvent(cudaEvent_t*) { throw std::bad_function_call(); }
76 virtual cudaStream_t* newCudaStream() { throw std::bad_function_call(); }
77 virtual void deleteCudaStream(cudaStream_t*) { throw std::bad_function_call(); }
78 virtual bool streamIsActive(cudaStream_t*) { throw std::bad_function_call(); }
79 virtual void cudaEventRecord(cudaEvent_t*, cudaStream_t*) { throw std::bad_function_call(); }
80 virtual void cudaStreamWaitEvent(cudaStream_t*, cudaEvent_t*) { throw std::bad_function_call(); }
81 virtual float cudaEventElapsedTime(cudaEvent_t*, cudaEvent_t*) { throw std::bad_function_call(); }
82 virtual void memcpyToCUDA(void*, const void*, size_t, cudaStream_t* =nullptr) { throw std::bad_function_call(); }
83 virtual void memcpyToCPU (void*, const void*, size_t, cudaStream_t* =nullptr) { throw std::bad_function_call(); }
84};
85
86/**
87 * This dispatch pointer points to an implementation of the compute library, provided one has been loaded.
88 * Using a virtual call, computation requests are dispatched to backends with architecture-specific functions
89 * such as SSE, AVX, AVX2, etc.
90 *
91 * \see RooBatchComputeInterface, RooBatchComputeClass, RF_ARCH
92 */
94} // End namespace RooBatchCompute
95
96#endif
#define R__EXTERN
Definition: DllImport.h:27
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
The interface which should be implemented to provide optimised computation functions for implementati...
virtual void cudaEventRecord(cudaEvent_t *, cudaStream_t *)
virtual cudaEvent_t * newCudaEvent(bool)
virtual void compute(cudaStream_t *, Computer, RestrictArr, size_t, const VarVector &, const ArgVector &={})=0
virtual std::string architectureName() const =0
virtual float cudaEventElapsedTime(cudaEvent_t *, cudaEvent_t *)
virtual void memcpyToCPU(void *, const void *, size_t, cudaStream_t *=nullptr)
virtual double sumReduce(cudaStream_t *, InputArr input, size_t n)=0
virtual void cudaStreamWaitEvent(cudaStream_t *, cudaEvent_t *)
virtual bool streamIsActive(cudaStream_t *)
virtual void deleteCudaStream(cudaStream_t *)
virtual void deleteCudaEvent(cudaEvent_t *)
virtual void memcpyToCUDA(void *, const void *, size_t, cudaStream_t *=nullptr)
virtual Architecture architecture() const =0
const Int_t n
Definition: legend1.C:16
Namespace for dispatching RooFit computations to various backends.
std::vector< RooSpan< const double > > VarVector
R__EXTERN RooBatchComputeInterface * dispatchCUDA
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
const double *__restrict InputArr
std::vector< double > ArgVector
double *__restrict RestrictArr