26#include "cuda_runtime.h"
35template <
typename AFloat>
43template <
typename AFloat>
46 AFloat **pointer =
new AFloat *[1];
52template <
typename AFloat>
55 return (fHostPointer) ? *fHostPointer + fOffset :
nullptr;
59template <
typename AFloat>
69template <
typename AFloat>
78template <
typename AFloat>
86template <
typename AFloat>
89 AFloat **pointer =
new AFloat *[1];
96template <
typename AFloat>
98 : fOffset(0),
fSize(
size), fComputeStream(stream), fDestructor()
100 AFloat **pointer =
new AFloat *[1];
106template <
typename AFloat>
108 : fOffset(0),
fSize(
size), fComputeStream(stream), fDestructor()
110 AFloat **pointer =
new AFloat *[1];
116template <
typename AFloat>
126template <
typename AFloat>
129 return (fDevicePointer) ? *fDevicePointer + fOffset :
nullptr;
133template <
typename AFloat>
141template <
typename AFloat>
145 buffer.fComputeStream = fComputeStream;
154 size_t n = inputMatrix.GetNcols();
156 for (
size_t i = 0; i < batchSize; i++) {
158 for (
size_t j = 0;
j <
n;
j++) {
172 size_t n = outputMatrix.GetNcols();
174 for (
size_t i = 0; i < batchSize; i++) {
176 for (
size_t j = 0;
j <
n;
j++) {
190 for (
size_t i = 0; i < batchSize; i++) {
201 Event *
event = std::get<0>(fData)[0];
202 size_t n =
event->GetNVariables();
203 for (
size_t i = 0; i < batchSize; i++) {
206 for (
size_t j = 0;
j <
n;
j++) {
208 buffer[
bufferIndex] =
static_cast<float>(
event->GetValue(
j));
219 size_t n = buffer.GetSize() / batchSize;
223 for (
size_t i = 0; i < batchSize; i++) {
226 for (
size_t j = 0;
j <
n;
j++) {
230 if (event->GetNTargets() == 0) {
237 if (
j == event->GetClass()) {
242 buffer[
bufferIndex] =
static_cast<float>(
event->GetTarget(
j));
253 for (
size_t i = 0; i < batchSize; i++) {
256 buffer[i] =
static_cast<float>(
event->GetWeight());
266 size_t n = inputMatrix.GetNcols();
268 for (
size_t i = 0; i < batchSize; i++) {
270 for (
size_t j = 0;
j <
n;
j++) {
284 size_t n = outputMatrix.GetNcols();
286 for (
size_t i = 0; i < batchSize; i++) {
288 for (
size_t j = 0;
j <
n;
j++) {
302 for (
size_t i = 0; i < batchSize; i++) {
303 buffer[i] =
static_cast<double>(weightMatrix(*
sampleIterator, 0));
313 Event *
event = std::get<0>(fData)[0];
314 size_t n =
event->GetNVariables();
315 for (
size_t i = 0; i < batchSize; i++) {
318 for (
size_t j = 0;
j <
n;
j++) {
331 size_t n = buffer.GetSize() / batchSize;
335 for (
size_t i = 0; i < batchSize; i++) {
338 for (
size_t j = 0;
j <
n;
j++) {
342 if (event->GetNTargets() == 0) {
349 if (
j == event->GetClass()) {
365 for (
size_t i = 0; i < batchSize; i++) {
368 buffer[i] =
static_cast<double>(
event->GetWeight());
377 const std::vector<TMatrixT<Double_t>> &inputTensor = std::get<0>(fData);
379 if (fBatchDepth == 1) {
380 for (
size_t i = 0; i < fBatchHeight; i++) {
382 for (
size_t j = 0;
j < fBatchWidth;
j++) {
389 for (
size_t i = 0; i < fBatchDepth; i++) {
391 for (
size_t j = 0;
j < fBatchHeight;
j++) {
392 for (
size_t k = 0; k < fBatchWidth; k++) {
393 size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight +
j;
408 size_t n = outputMatrix.GetNcols();
410 for (
size_t i = 0; i < fBatchSize; i++) {
412 for (
size_t j = 0;
j <
n;
j++) {
426 for (
size_t i = 0; i < fBatchSize; i++) {
439 if (fBatchDepth == 1 && fBatchHeight == fBatchSize) {
440 for (
size_t i = 0; i < fBatchHeight; i++) {
443 for (
size_t j = 0;
j < fBatchWidth;
j++) {
449 }
else if (fBatchDepth == fBatchSize) {
451 for (
size_t i = 0; i < fBatchDepth; i++) {
454 for (
size_t j = 0;
j < fBatchHeight;
j++) {
455 for (
size_t k = 0; k < fBatchWidth; k++) {
457 size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight +
j;
458 buffer[
bufferIndex] =
event->GetValue(
j * fBatchWidth + k);
465 std::cout << fBatchDepth << fBatchSize << fBatchHeight << std::endl;
466 Error(
"TTensorDataLoader",
"Inconsistency between batch depth and batch size");
476 size_t n = buffer.GetSize() / fBatchSize;
480 for (
size_t i = 0; i < fBatchSize; i++) {
483 for (
size_t j = 0;
j <
n;
j++) {
487 if (event->GetNTargets() == 0) {
494 if (
j == event->GetClass()) {
510 for (
size_t i = 0; i < fBatchSize; i++) {
513 buffer[i] =
event->GetWeight();
522 const std::vector<TMatrixT<Double_t>> &inputTensor = std::get<0>(fData);
524 if (fBatchDepth == 1) {
525 for (
size_t i = 0; i < fBatchHeight; i++) {
527 for (
size_t j = 0;
j < fBatchWidth;
j++) {
534 for (
size_t i = 0; i < fBatchDepth; i++) {
536 for (
size_t j = 0;
j < fBatchHeight;
j++) {
537 for (
size_t k = 0; k < fBatchWidth; k++) {
538 size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight +
j;
553 size_t n = outputMatrix.GetNcols();
555 for (
size_t i = 0; i < fBatchSize; i++) {
557 for (
size_t j = 0;
j <
n;
j++) {
572 for (
size_t i = 0; i < fBatchSize; i++) {
585 if (fBatchDepth == 1 && fBatchHeight == fBatchSize) {
586 for (
size_t i = 0; i < fBatchHeight; i++) {
589 for (
size_t j = 0;
j < fBatchWidth;
j++) {
595 }
else if (fBatchDepth == fBatchSize) {
597 for (
size_t i = 0; i < fBatchDepth; i++) {
600 for (
size_t j = 0;
j < fBatchHeight;
j++) {
601 for (
size_t k = 0; k < fBatchWidth; k++) {
603 size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight +
j;
604 buffer[
bufferIndex] =
event->GetValue(
j * fBatchWidth + k);
611 std::cout << fBatchDepth << fBatchSize << fBatchHeight << std::endl;
612 Error(
"TTensorDataLoader",
"Inconsistency between batch depth and batch size");
623 size_t n = buffer.GetSize() / fBatchSize;
627 for (
size_t i = 0; i < fBatchSize; i++) {
630 for (
size_t j = 0;
j <
n;
j++) {
634 if (event->GetNTargets() == 0) {
641 if (
j == event->GetClass()) {
657 for (
size_t i = 0; i < fBatchSize; i++) {
660 buffer[i] =
event->GetWeight();
673 std::vector<Matrix_t> inputTensor(std::get<0>(
DeviceBuffers), fBatchSize, )
674 size_t
jump = fBatchHeight * fBatchWidth;
675 for (
size_t i = 0; i < fBatchSize; i++) {
679 Matrix_t outputMatrix(std::get<1>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
680 Matrix_t weightMatrix(std::get<2>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
694 std::vector<Matrix_t> inputTensor;
695 size_t jump = fBatchHeight * fBatchWidth;
696 for (
size_t i = 0; i < fBatchSize; i++) {
700 Matrix_t outputMatrix(std::get<1>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
701 Matrix_t weightMatrix(std::get<2>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
715 std::vector<Matrix_t> inputTensor;
716 size_t jump = fBatchHeight * fBatchWidth;
717 for (
size_t i = 0; i < fBatchSize; i++) {
721 Matrix_t outputMatrix(std::get<1>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
722 Matrix_t weightMatrix(std::get<2>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
736 std::vector<Matrix_t> inputTensor;
737 size_t jump = fBatchHeight * fBatchWidth;
738 for (
size_t i = 0; i < fBatchSize; i++) {
742 Matrix_t outputMatrix(std::get<1>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
743 Matrix_t weightMatrix(std::get<2>(
DeviceBuffers), fBatchSize, fNOutputFeatures);
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
void Error(const char *location, const char *msgfmt,...)
Use this function in case an error occurred.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
size_t fOffset
Offset for sub-buffers.
void CopyFrom(const TCudaHostBuffer< AFloat > &) const
void CopyTo(const TCudaHostBuffer< AFloat > &) const
struct TMVA::DNN::TCudaDeviceBuffer::TDestructor fDestructor
TCudaDeviceBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
cudaStream_t fComputeStream
cudaStream for data transfer
std::shared_ptr< AFloat * > fDevicePointer
Pointer to the buffer data.
TCudaDeviceBuffer()=default
struct TMVA::DNN::TCudaHostBuffer::TDestructor fDestructor
size_t fOffset
Offset for sub-buffers.
TCudaHostBuffer GetSubBuffer(size_t offset, size_t size)
Return sub-buffer of the current buffer.
TCudaHostBuffer()=default
cudaStream_t fComputeStream
cudaStream for data transfer
void SetConstVal(const AFloat constVal)
Sets the entire buffer to a constant value.
std::shared_ptr< AFloat * > fHostPointer
Pointer to the buffer data.
Class that contains all the data information.
create variable transformations
void operator()(AFloat **devicePointer)
void operator()(AFloat **devicePointer)