Logo ROOT   6.07/09
Reference Guide
Device.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 13/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ////////////////////////////////////////////////////////////////
13 // Defines the TDevice class which encapsules device specific //
14 // settings for the launching of threads. //
15 ////////////////////////////////////////////////////////////////
16 
17 #ifndef TMVA_DNN_ARCHITECTURES_CUDA_DEVICE
18 #define TMVA_DNN_ARCHITECTURES_CUDA_DEVICE
19 
20 #include "cuda.h"
21 #include "vector_types.h" // definition of dim3
22 #include "CudaMatrix.h"
23 
24 namespace TMVA
25 {
26 namespace DNN
27 {
28 
29 /** TDevice
30  *
31  * The TDevice class provides static functions for the generation of CUDA
32  * grids for kernel launches and is used to encapsulate the distribution
33  * of threads and blocks over the data.
34  *
35  */
36 class TDevice
37 {
38 public:
39  /* Number of threads per block along first dimensions. */
40  static constexpr int BlockDimX = 1;
41  /* Number of threads per block along second dimensions. */
42  static constexpr int BlockDimY = 32;
43  /* Resulting block size. */
44  static constexpr int BlockSize = BlockDimX * BlockDimY;
45 
46  /* Return dim3 object representing the a BlockDimX x BlockDimY 2D
47  * block */
48  static dim3 BlockDims()
49  {
50  return dim3(BlockDimX, BlockDimY);
51  }
52 
53  /* Return 2D dim3 object representing the block grid consisting of two-dimensional
54  * BlockDimX x BlockDimY blocks covering the matrix A */
55  template<typename AFloat>
56  static dim3 GridDims(const TCudaMatrix<AFloat> &A)
57  {
58  int gridDimX = A.GetNcols() / TDevice::BlockDimX;
59  if ((A.GetNcols() % TDevice::BlockDimX) != 0)
60  gridDimX += 1;
61  int gridDimY = A.GetNrows() / TDevice::BlockDimY;
62  if ((A.GetNrows() % TDevice::BlockDimY) != 0)
63  gridDimY += 1;
64  return dim3(gridDimX, gridDimY);
65  }
66 
67  /* Return the number of threads that will be launched for a given matrix \p A */
68  template<typename AFloat>
69  static int NThreads(const TCudaMatrix<AFloat> &A)
70  {
71  int gridDimX = A.GetNcols() / TDevice::BlockDimX;
72  if ((A.GetNcols() % TDevice::BlockDimX) != 0) {
73  gridDimX += 1;
74  }
75  int gridDimY = A.GetNrows() / TDevice::BlockDimY;
76  if ((A.GetNrows() % TDevice::BlockDimY) != 0) {
77  gridDimY += 1;
78  }
79  return gridDimX * gridDimY * TDevice::BlockDimX * TDevice::BlockDimY;
80  }
81 };
82 
83 } // namespace DNN
84 } // namespace TMVA
85 
86 #endif
static constexpr int BlockSize
Definition: Device.h:44
static dim3 BlockDims()
Definition: Device.h:48
static double A[]
static int NThreads(const TCudaMatrix< AFloat > &A)
Definition: Device.h:69
TDevice.
Definition: Device.h:36
static constexpr int BlockDimY
Definition: Device.h:42
size_t GetNcols() const
Definition: CudaMatrix.h:152
Abstract ClassifierFactory template that handles arbitrary types.
size_t GetNrows() const
Definition: CudaMatrix.h:151
static dim3 GridDims(const TCudaMatrix< AFloat > &A)
Definition: Device.h:56
TCudaMatrix Class.
Definition: CudaMatrix.h:98
static constexpr int BlockDimX
Definition: Device.h:40