Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RooBatchCompute.h
Go to the documentation of this file.
1/*
2 * Project: RooFit
3 * Authors:
4 * Emmanouil Michalainas, CERN 6 January 2021
5 *
6 * Copyright (c) 2021, CERN
7 *
8 * Redistribution and use in source and binary forms,
9 * with or without modification, are permitted according to the terms
10 * listed in LICENSE (http://roofit.sourceforge.net/license.txt)
11 */
12
13#ifndef ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
14#define ROOFIT_BATCHCOMPUTE_ROOBATCHCOMPUTE_H
15
16#include <ROOT/RSpan.hxx>
17
18#include <RConfig.h>
19
20#ifdef R__HAS_CUDA
22#endif
23
24#include <DllImport.h> //for R__EXTERN, needed for windows
25
26#include <Math/Util.h>
27
28#include <cassert>
29#include <functional>
30#include <string>
31#include <vector>
32
33/**
34 * Namespace for dispatching RooFit computations to various backends.
35 *
36 * This namespace contains an interface for providing high-performance computation functions for use in
37 * RooAbsReal::computeBatch(), see RooBatchComputeInterface.
38 *
39 * Furthermore, several implementations of this interface can be created, which reside in RooBatchCompute::RF_ARCH,
40 * where RF_ARCH may be replaced by the architecture that this implementation targets, e.g. SSE, AVX, etc.
41 *
42 * Using the pointer RooBatchCompute::dispatch, a computation request can be dispatched to the fastest backend that is
43 * available on a specific platform.
44 */
45namespace RooBatchCompute {
46
47typedef std::vector<std::span<const double>> VarVector;
48typedef std::vector<double> ArgVector;
49typedef double *__restrict RestrictArr;
50typedef const double *__restrict InputArr;
51
52void init();
53
54/// Minimal configuration struct to steer the evaluation of a single node with
55/// the RooBatchCompute library.
56class Config {
57public:
58#ifdef R__HAS_CUDA
59 bool useCuda() const { return _cudaStream != nullptr; }
60 void setCudaStream(RooFit::Detail::CudaInterface::CudaStream *cudaStream) { _cudaStream = cudaStream; }
61 RooFit::Detail::CudaInterface::CudaStream *cudaStream() const { return _cudaStream; }
62#else
63 bool useCuda() const { return false; }
64#endif
65
66private:
67#ifdef R__HAS_CUDA
68 RooFit::Detail::CudaInterface::CudaStream *_cudaStream = nullptr;
69#endif
70};
71
73
115
118 std::size_t nLargeValues = 0;
119 std::size_t nNonPositiveValues = 0;
120 std::size_t nNaNValues = 0;
121};
122
123/**
124 * \class RooBatchComputeInterface
125 * \ingroup Roobatchcompute
126 * \brief The interface which should be implemented to provide optimised computation functions for implementations of
127 * RooAbsReal::computeBatch().
128 *
129 * The class RooBatchComputeInterface provides the mechanism for external modules (like RooFit) to call
130 * functions from the library. The power lies in the virtual functions that can resolve to different
131 * implementations for the functionality; for example, calling a function through dispatchCuda
132 * will resolve to efficient CUDA implementations.
133 *
134 * This interface contains the signatures of the compute functions of every PDF that has an optimised implementation
135 * available. These are the functions that perform the actual computations in batches.
136 *
137 * Several implementations of this interface may be provided, e.g. SSE, AVX, AVX2 etc. At run time, the fastest
138 * implementation of this interface is selected, and using a virtual call, the computation is dispatched to the best
139 * backend.
140 *
141 * \see RooBatchCompute::dispatch, RooBatchComputeClass, RF_ARCH
142 */
144public:
145 virtual ~RooBatchComputeInterface() = default;
146 virtual void compute(Config const &cfg, Computer, RestrictArr, size_t, const VarVector &, ArgVector &) = 0;
147 inline void compute(Config const &cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars)
148 {
149 ArgVector extraArgs{};
150 compute(cfg, comp, output, size, vars, extraArgs);
151 }
152
153 virtual double reduceSum(Config const &cfg, InputArr input, size_t n) = 0;
154 virtual ReduceNLLOutput reduceNLL(Config const &cfg, std::span<const double> probas,
155 std::span<const double> weightSpan, std::span<const double> weights,
156 double weightSum, std::span<const double> binVolumes) = 0;
157
158 virtual Architecture architecture() const = 0;
159 virtual std::string architectureName() const = 0;
160};
161
162/**
163 * This dispatch pointer points to an implementation of the compute library, provided one has been loaded.
164 * Using a virtual call, computation requests are dispatched to backends with architecture-specific functions
165 * such as SSE, AVX, AVX2, etc.
166 *
167 * \see RooBatchComputeInterface, RooBatchComputeClass, RF_ARCH
168 */
170
172{
173 init();
174 return dispatchCPU->architecture();
175}
176
177inline std::string cpuArchitectureName()
178{
179 init();
181}
182
183inline bool hasCuda()
184{
185 init();
186 return dispatchCUDA;
187}
188
189inline void
190compute(Config cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars, ArgVector &extraArgs)
191{
192 init();
193 auto dispatch = cfg.useCuda() ? dispatchCUDA : dispatchCPU;
194 dispatch->compute(cfg, comp, output, size, vars, extraArgs);
195}
196
197inline void compute(Config cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars)
198{
199 ArgVector extraArgs{};
200 compute(cfg, comp, output, size, vars, extraArgs);
201}
202
203inline double reduceSum(Config cfg, InputArr input, size_t n)
204{
205 init();
206 auto dispatch = cfg.useCuda() ? dispatchCUDA : dispatchCPU;
207 return dispatch->reduceSum(cfg, input, n);
208}
209
210inline ReduceNLLOutput reduceNLL(Config cfg, std::span<const double> probas, std::span<const double> weightSpan,
211 std::span<const double> weights, double weightSum, std::span<const double> binVolumes)
212{
213 init();
214 auto dispatch = cfg.useCuda() ? dispatchCUDA : dispatchCPU;
215 return dispatch->reduceNLL(cfg, probas, weightSpan, weights, weightSum, binVolumes);
216}
217
218} // End namespace RooBatchCompute
219
220#endif
#define R__EXTERN
Definition DllImport.h:27
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void input
The Kahan summation is a compensated summation algorithm, which significantly reduces numerical error...
Definition Util.h:122
Minimal configuration struct to steer the evaluation of a single node with the RooBatchCompute librar...
The interface which should be implemented to provide optimised computation functions for implementati...
virtual double reduceSum(Config const &cfg, InputArr input, size_t n)=0
void compute(Config const &cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars)
virtual std::string architectureName() const =0
virtual void compute(Config const &cfg, Computer, RestrictArr, size_t, const VarVector &, ArgVector &)=0
virtual Architecture architecture() const =0
virtual ReduceNLLOutput reduceNLL(Config const &cfg, std::span< const double > probas, std::span< const double > weightSpan, std::span< const double > weights, double weightSum, std::span< const double > binVolumes)=0
const Int_t n
Definition legend1.C:16
Namespace for dispatching RooFit computations to various backends.
R__EXTERN RooBatchComputeInterface * dispatchCUDA
std::string cpuArchitectureName()
R__EXTERN RooBatchComputeInterface * dispatchCPU
This dispatch pointer points to an implementation of the compute library, provided one has been loade...
std::vector< std::span< const double > > VarVector
double reduceSum(Config cfg, InputArr input, size_t n)
Architecture cpuArchitecture()
const double *__restrict InputArr
void init()
Inspect hardware capabilities, and load the optimal library for RooFit computations.
std::vector< double > ArgVector
ReduceNLLOutput reduceNLL(Config cfg, std::span< const double > probas, std::span< const double > weightSpan, std::span< const double > weights, double weightSum, std::span< const double > binVolumes)
void compute(Config cfg, Computer comp, RestrictArr output, size_t size, const VarVector &vars, ArgVector &extraArgs)
double *__restrict RestrictArr
ROOT::Math::KahanSum< double > nllSum
static void output()