26#include "cuda_runtime.h" 
   35template <
typename AFloat>
 
   43template <
typename AFloat>
 
   46   AFloat **pointer = 
new AFloat *[1];
 
 
   52template <
typename AFloat>
 
   55   return (fHostPointer) ? *fHostPointer + fOffset : 
nullptr;
 
 
   59template <
typename AFloat>
 
   69template <
typename AFloat>
 
   78template <
typename AFloat>
 
   86template <
typename AFloat>
 
   89   AFloat **pointer = 
new AFloat *[1];
 
 
   96template <
typename AFloat>
 
   98   : fOffset(0), 
fSize(
size), fComputeStream(stream), fDestructor()
 
  100   AFloat **pointer = 
new AFloat *[1];
 
 
  106template <
typename AFloat>
 
  108   : fOffset(0), 
fSize(
size), fComputeStream(stream), fDestructor()
 
  110   AFloat **pointer = 
new AFloat *[1];
 
 
  116template <
typename AFloat>
 
  126template <
typename AFloat>
 
  129   return (fDevicePointer) ? *fDevicePointer + fOffset : 
nullptr;
 
 
  133template <
typename AFloat>
 
  141template <
typename AFloat>
 
  145   buffer.fComputeStream = fComputeStream;
 
 
  154   size_t n = inputMatrix.GetNcols();
 
  156   for (
size_t i = 0; i < batchSize; i++) {
 
  158      for (
size_t j = 0; 
j < 
n; 
j++) {
 
 
  172   size_t n = outputMatrix.GetNcols();
 
  174   for (
size_t i = 0; i < batchSize; i++) {
 
  176      for (
size_t j = 0; 
j < 
n; 
j++) {
 
 
  190   for (
size_t i = 0; i < batchSize; i++) {
 
 
  201   Event *
event = std::get<0>(fData)[0];
 
  202   size_t n  = 
event->GetNVariables();
 
  203   for (
size_t i = 0; i < batchSize; i++) {
 
  206      for (
size_t j = 0; 
j < 
n; 
j++) {
 
  208         buffer[
bufferIndex] = 
static_cast<float>(
event->GetValue(
j));
 
 
  219  size_t n = buffer.GetSize() / batchSize;
 
  223  for (
size_t i = 0; i < batchSize; i++) {
 
  226    for (
size_t j = 0; 
j < 
n; 
j++) {
 
  230      if (event->GetNTargets() == 0) {
 
  237          if (
j == event->GetClass()) {
 
  242        buffer[
bufferIndex] = 
static_cast<float>(
event->GetTarget(
j));
 
 
  253   for (
size_t i = 0; i < batchSize; i++) {
 
  256      buffer[i] = 
static_cast<float>(
event->GetWeight());
 
 
  266   size_t n = inputMatrix.GetNcols();
 
  268   for (
size_t i = 0; i < batchSize; i++) {
 
  270      for (
size_t j = 0; 
j < 
n; 
j++) {
 
 
  284   size_t n = outputMatrix.GetNcols();
 
  286   for (
size_t i = 0; i < batchSize; i++) {
 
  288      for (
size_t j = 0; 
j < 
n; 
j++) {
 
 
  302   for (
size_t i = 0; i < batchSize; i++) {
 
  303      buffer[i] = 
static_cast<double>(weightMatrix(*
sampleIterator, 0));
 
 
  313   Event *
event = std::get<0>(fData)[0];
 
  314   size_t n  = 
event->GetNVariables();
 
  315   for (
size_t i = 0; i < batchSize; i++) {
 
  318      for (
size_t j = 0; 
j < 
n; 
j++) {
 
 
  331  size_t n = buffer.GetSize() / batchSize;
 
  335  for (
size_t i = 0; i < batchSize; i++) {
 
  338    for (
size_t j = 0; 
j < 
n; 
j++) {
 
  342      if (event->GetNTargets() == 0) {
 
  349          if (
j == event->GetClass()) {
 
 
  365   for (
size_t i = 0; i < batchSize; i++) {
 
  368      buffer[i] = 
static_cast<double>(
event->GetWeight());
 
 
  377   const std::vector<TMatrixT<Double_t>> &inputTensor = std::get<0>(fData);
 
  379   if (fBatchDepth == 1) {
 
  380      for (
size_t i = 0; i < fBatchHeight; i++) {
 
  382         for (
size_t j = 0; 
j < fBatchWidth; 
j++) {
 
  389      for (
size_t i = 0; i < fBatchDepth; i++) {
 
  391         for (
size_t j = 0; 
j < fBatchHeight; 
j++) {
 
  392            for (
size_t k = 0; k < fBatchWidth; k++) {
 
  393               size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + 
j;
 
 
  408   size_t n = outputMatrix.GetNcols();
 
  410   for (
size_t i = 0; i < fBatchSize; i++) {
 
  412      for (
size_t j = 0; 
j < 
n; 
j++) {
 
 
  426   for (
size_t i = 0; i < fBatchSize; i++) {
 
 
  439   if (fBatchDepth == 1 && fBatchHeight == fBatchSize) {
 
  440      for (
size_t i = 0; i < fBatchHeight; i++) {
 
  443         for (
size_t j = 0; 
j < fBatchWidth; 
j++) {
 
  449   } 
else if (fBatchDepth == fBatchSize) {
 
  451      for (
size_t i = 0; i < fBatchDepth; i++) {
 
  454         for (
size_t j = 0; 
j < fBatchHeight; 
j++) {
 
  455            for (
size_t k = 0; k < fBatchWidth; k++) {
 
  457               size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + 
j;
 
  458               buffer[
bufferIndex] = 
event->GetValue(
j * fBatchWidth + k);
 
  465      std::cout  << fBatchDepth << fBatchSize << fBatchHeight << std::endl;
 
  466      Error(
"TTensorDataLoader",
"Inconsistency between batch depth and batch size");
 
 
  476   size_t n = buffer.GetSize() / fBatchSize;
 
  480   for (
size_t i = 0; i < fBatchSize; i++) {
 
  483      for (
size_t j = 0; 
j < 
n; 
j++) {
 
  487         if (event->GetNTargets() == 0) {
 
  494               if (
j == event->GetClass()) {
 
 
  510   for (
size_t i = 0; i < fBatchSize; i++) {
 
  513      buffer[i] = 
event->GetWeight();
 
 
  522   const std::vector<TMatrixT<Double_t>> &inputTensor = std::get<0>(fData);
 
  524   if (fBatchDepth == 1) {
 
  525      for (
size_t i = 0; i < fBatchHeight; i++) {
 
  527         for (
size_t j = 0; 
j < fBatchWidth; 
j++) {
 
  534      for (
size_t i = 0; i < fBatchDepth; i++) {
 
  536         for (
size_t j = 0; 
j < fBatchHeight; 
j++) {
 
  537            for (
size_t k = 0; k < fBatchWidth; k++) {
 
  538               size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + 
j;
 
 
  553   size_t n = outputMatrix.GetNcols();
 
  555   for (
size_t i = 0; i < fBatchSize; i++) {
 
  557      for (
size_t j = 0; 
j < 
n; 
j++) {
 
 
  572   for (
size_t i = 0; i < fBatchSize; i++) {
 
 
  585   if (fBatchDepth == 1 && fBatchHeight == fBatchSize) {
 
  586      for (
size_t i = 0; i < fBatchHeight; i++) {
 
  589         for (
size_t j = 0; 
j < fBatchWidth; 
j++) {
 
  595   } 
else if (fBatchDepth == fBatchSize) {
 
  597      for (
size_t i = 0; i < fBatchDepth; i++) {
 
  600         for (
size_t j = 0; 
j < fBatchHeight; 
j++) {
 
  601            for (
size_t k = 0; k < fBatchWidth; k++) {
 
  603               size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + 
j;
 
  604               buffer[
bufferIndex] = 
event->GetValue(
j * fBatchWidth + k);
 
  611      std::cout  << fBatchDepth << fBatchSize << fBatchHeight << std::endl;
 
  612      Error(
"TTensorDataLoader",
"Inconsistency between batch depth and batch size");
 
 
  623   size_t n = buffer.GetSize() / fBatchSize;
 
  627   for (
size_t i = 0; i < fBatchSize; i++) {
 
  630      for (
size_t j = 0; 
j < 
n; 
j++) {
 
  634         if (event->GetNTargets() == 0) {
 
  641               if (
j == event->GetClass()) {
 
 
  657   for (
size_t i = 0; i < fBatchSize; i++) {
 
  660      buffer[i] = 
event->GetWeight();
 
 
  673   std::vector<Matrix_t> inputTensor(std::get<0>(
DeviceBuffers), fBatchSize, )
 
  674   size_t 
jump = fBatchHeight * fBatchWidth;
 
  675   for (
size_t i = 0; i < fBatchSize; i++) {
 
  679   Matrix_t outputMatrix(std::get<1>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
 
  680   Matrix_t weightMatrix(std::get<2>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
 
  694   std::vector<Matrix_t> inputTensor;
 
  695   size_t jump = fBatchHeight * fBatchWidth;
 
  696   for (
size_t i = 0; i < fBatchSize; i++) {
 
  700   Matrix_t outputMatrix(std::get<1>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
 
  701   Matrix_t weightMatrix(std::get<2>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
 
  715   std::vector<Matrix_t> inputTensor;
 
  716   size_t jump = fBatchHeight * fBatchWidth;
 
  717   for (
size_t i = 0; i < fBatchSize; i++) {
 
  721   Matrix_t outputMatrix(std::get<1>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
 
  722   Matrix_t weightMatrix(std::get<2>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
 
  736   std::vector<Matrix_t> inputTensor;
 
  737   size_t jump = fBatchHeight * fBatchWidth;
 
  738   for (
size_t i = 0; i < fBatchSize; i++) {
 
  742   Matrix_t outputMatrix(std::get<1>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
 
  743   Matrix_t weightMatrix(std::get<2>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
 
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
 
float Float_t
Float 4 bytes (float)
 
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
 
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
 
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
 
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
 
size_t fOffset
Offset for sub-buffers.
 
void CopyFrom(const TCudaHostBuffer< AFloat > &) const
 
void CopyTo(const TCudaHostBuffer< AFloat > &) const
 
struct TMVA::DNN::TCudaDeviceBuffer::TDestructor fDestructor
 
TCudaDeviceBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
 
cudaStream_t fComputeStream
cudaStream for data transfer
 
std::shared_ptr< AFloat * > fDevicePointer
Pointer to the buffer data.
 
TCudaDeviceBuffer()=default
 
struct TMVA::DNN::TCudaHostBuffer::TDestructor fDestructor
 
size_t fOffset
Offset for sub-buffers.
 
TCudaHostBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
 
TCudaHostBuffer()=default
 
cudaStream_t fComputeStream
cudaStream for data transfer
 
void SetConstVal(const AFloat constVal)
Sets the entire buffer to a constant value.
 
std::shared_ptr< AFloat * > fHostPointer
Pointer to the buffer data.
 
Class that contains all the data information.
 
create variable transformations
 
void operator()(AFloat **devicePointer)
 
void operator()(AFloat **devicePointer)