Logo ROOT  
Reference Guide
TensorDataLoader.cxx
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Lorenzo Moneta,
3
4
5////////////////////////////////////////////////////////////////////////
6// Implementation of TensorDataLoader functions for CUDA with CuDNN architecture. //
7////////////////////////////////////////////////////////////////////////
8
9#include "TMVA/DataSetInfo.h"
10
13
15
16
17
18#include "cuda_runtime.h"
19#include <algorithm>
20
21namespace TMVA {
22namespace DNN {
23
24//______________________________________________________________________________
25//
26// cuDNN
27//______________________________________________________________________________
28template <>
30 IndexIterator_t sampleIterator)
31{
32 const std::vector<TMatrixT<Double_t> > &inputTensor = std::get<0>(fData);
33
34 if (fBatchDepth == 1) {
35 for (size_t i = 0; i < fBatchHeight; i++) {
36 size_t sampleIndex = *sampleIterator;
37 for (size_t j = 0; j < fBatchWidth; j++) {
38 size_t bufferIndex = j * fBatchHeight + i;
39 buffer[bufferIndex] = static_cast<float>(inputTensor[0](sampleIndex, j));
40 }
41 sampleIterator++;
42 }
43 } else {
44 for (size_t i = 0; i < fBatchDepth; i++) {
45 size_t sampleIndex = *sampleIterator;
46 for (size_t j = 0; j < fBatchHeight; j++) {
47 for (size_t k = 0; k < fBatchWidth; k++) {
48 size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + j;
49 buffer[bufferIndex] = static_cast<float>(inputTensor[sampleIndex](j, k));
50 }
51 }
52 sampleIterator++;
53 }
54 }
55}
56
57//______________________________________________________________________________
58template <>
60 IndexIterator_t sampleIterator)
61{
62 const TMatrixT<Double_t> &outputMatrix = std::get<1>(fData);
63 size_t n = outputMatrix.GetNcols();
64
65 for (size_t i = 0; i < fBatchSize; i++) {
66 size_t sampleIndex = *sampleIterator;
67 for (size_t j = 0; j < n; j++) {
68 size_t bufferIndex = j * fBatchSize + i;
69 buffer[bufferIndex] = static_cast<float>(outputMatrix(sampleIndex, j));
70 }
71 sampleIterator++;
72 }
73}
74
75//______________________________________________________________________________
76template <>
78 IndexIterator_t sampleIterator)
79{
80 const TMatrixT<Double_t> &weightMatrix = std::get<2>(fData);
81
82 for (size_t i = 0; i < fBatchSize; i++) {
83 buffer[i] = static_cast<float>(weightMatrix(*sampleIterator, 0));
84 sampleIterator++;
85 }
86}
87
88//______________________________________________________________________________
89template <>
91 IndexIterator_t sampleIterator)
92{
93 // Image has channel depth 1 -> they are ordered as row-vectors in a matrix (batchHeight = batchSize)
94 // one event, one example in the batch
95 if (fBatchDepth == 1 && fBatchHeight == fBatchSize) {
96 for (size_t i = 0; i < fBatchHeight; i++) {
97 size_t sampleIndex = *sampleIterator;
98 Event * event = std::get<0>(fData)[sampleIndex];
99 for (size_t j = 0; j < fBatchWidth; j++) {
100 size_t bufferIndex = j * fBatchHeight + i;
101 buffer[bufferIndex] = event->GetValue(j);
102 }
103 sampleIterator++;
104 }
105 // A batch is made up by a single image with its channels
106 } else if (fBatchDepth == fBatchSize) {
107 for (size_t i = 0; i < fBatchSize; i++) {
108 size_t sampleIndex = *sampleIterator;
109 Event * event = std::get<0>(fData)[sampleIndex];
110 for (size_t j = 0; j < fBatchHeight; j++) {
111 for (size_t k = 0; k < fBatchWidth; k++) {
112 // Cudnn order is NCHW
113 size_t bufferIndex = i * fBatchHeight * fBatchWidth + j * fBatchWidth + k;
114 buffer[bufferIndex] = event->GetValue(j * fBatchWidth + k);
115 }
116 }
117 sampleIterator++;
118 }
119 }
120 else {
121 std::cout << fBatchDepth << fBatchSize << fBatchHeight << std::endl;
122 Error("TTensorDataLoader","Inconsistency between batch depth and batch size");
123 R__ASSERT(0);
124 }
125}
126//______________________________________________________________________________
127template <>
129 IndexIterator_t sampleIterator)
130{
131 const DataSetInfo &info = std::get<1>(fData);
132 size_t n = buffer.GetSize() / fBatchSize;
133
134 // Copy target(s).
135 for (size_t i = 0; i < fBatchSize; i++) {
136 size_t sampleIndex = *sampleIterator++;
137 Event *event = std::get<0>(fData)[sampleIndex];
138 for (size_t j = 0; j < n; j++) {
139 // Copy output matrices.
140 size_t bufferIndex = j * fBatchSize + i;
141 // Classification
142 if (event->GetNTargets() == 0) {
143 if (n == 1) {
144 // Binary.
145 buffer[bufferIndex] = (info.IsSignal(event)) ? 1.0 : 0.0;
146 } else {
147 // Multiclass.
148 buffer[bufferIndex] = 0.0;
149 if (j == event->GetClass()) {
150 buffer[bufferIndex] = 1.0;
151 }
152 }
153 } else {
154 buffer[bufferIndex] = static_cast<Float_t>(event->GetTarget(j));
155 }
156 }
157 }
158}
159
160//______________________________________________________________________________
161template <>
163 IndexIterator_t sampleIterator)
164{
165 for (size_t i = 0; i < fBatchSize; i++) {
166 size_t sampleIndex = *sampleIterator++;
167 Event *event = std::get<0>(fData)[sampleIndex];
168 buffer[i] = event->GetWeight();
169 }
170}
171
172//______________________________________________________________________________
173template <>
175 IndexIterator_t sampleIterator)
176{
177 const std::vector<TMatrixT<Double_t> > &inputTensor = std::get<0>(fData);
178
179 if (fBatchDepth == 1) {
180 for (size_t i = 0; i < fBatchHeight; i++) {
181 size_t sampleIndex = *sampleIterator;
182 for (size_t j = 0; j < fBatchWidth; j++) {
183 size_t bufferIndex = j * fBatchHeight + i;
184 buffer[bufferIndex] = static_cast<double>(inputTensor[0](sampleIndex, j));
185 }
186 sampleIterator++;
187 }
188 } else {
189 for (size_t i = 0; i < fBatchDepth; i++) {
190 size_t sampleIndex = *sampleIterator;
191 for (size_t j = 0; j < fBatchHeight; j++) {
192 for (size_t k = 0; k < fBatchWidth; k++) {
193 size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + j;
194 buffer[bufferIndex] = static_cast<double>(inputTensor[sampleIndex](j, k));
195 }
196 }
197 sampleIterator++;
198 }
199 }
200}
201
202//______________________________________________________________________________
203template <>
205 IndexIterator_t sampleIterator)
206{
207 const TMatrixT<Double_t> &outputMatrix = std::get<1>(fData);
208 size_t n = outputMatrix.GetNcols();
209
210 for (size_t i = 0; i < fBatchSize; i++) {
211 size_t sampleIndex = *sampleIterator;
212 for (size_t j = 0; j < n; j++) {
213 size_t bufferIndex = j * fBatchSize + i;
214 buffer[bufferIndex] = outputMatrix(sampleIndex, j);
215 }
216 sampleIterator++;
217 }
218}
219
220//______________________________________________________________________________
221template <>
223 IndexIterator_t sampleIterator)
224{
225 const TMatrixT<Double_t> &weightMatrix = std::get<2>(fData);
226 for (size_t i = 0; i < fBatchSize; i++) {
227 buffer[i] = weightMatrix(*sampleIterator, 0);
228 sampleIterator++;
229 }
230}
231
232//______________________________________________________________________________
233template <>
235 IndexIterator_t sampleIterator)
236{
237 // one event, one example in the batch
238 if (fBatchDepth == 1 && fBatchHeight == fBatchSize) {
239 for (size_t i = 0; i < fBatchHeight; i++) {
240 size_t sampleIndex = *sampleIterator;
241 Event * event = std::get<0>(fData)[sampleIndex];
242 for (size_t j = 0; j < fBatchWidth; j++) {
243 size_t bufferIndex = j * fBatchHeight + i;
244 buffer[bufferIndex] = event->GetValue(j);
245 }
246 sampleIterator++;
247 }
248 } else if (fBatchDepth == fBatchSize) {
249 // batchDepth is batch size
250 for (size_t i = 0; i < fBatchDepth; i++) {
251 size_t sampleIndex = *sampleIterator;
252 Event * event = std::get<0>(fData)[sampleIndex];
253 for (size_t j = 0; j < fBatchHeight; j++) {
254 for (size_t k = 0; k < fBatchWidth; k++) {
255 // because of the column-major ordering
256 size_t bufferIndex = i * fBatchHeight * fBatchWidth + j * fBatchWidth + k;
257 buffer[bufferIndex] = event->GetValue(j * fBatchWidth + k);
258 }
259 }
260 sampleIterator++;
261 }
262 }
263 else {
264 Error("TTensorDataLoader","Inconsistency between batch depth and batch size");
265 R__ASSERT(0);
266 }
267}
268
269//______________________________________________________________________________
270template <>
272 IndexIterator_t sampleIterator)
273{
274 const DataSetInfo &info = std::get<1>(fData);
275 size_t n = buffer.GetSize() / fBatchSize;
276
277 // Copy target(s).
278
279 for (size_t i = 0; i < fBatchSize; i++) {
280 size_t sampleIndex = *sampleIterator++;
281 Event *event = std::get<0>(fData)[sampleIndex];
282 for (size_t j = 0; j < n; j++) {
283 // Copy output matrices.
284 size_t bufferIndex = j * fBatchSize + i;
285 // Classification
286 if (event->GetNTargets() == 0) {
287 if (n == 1) {
288 // Binary.
289 buffer[bufferIndex] = (info.IsSignal(event)) ? 1.0 : 0.0;
290 } else {
291 // Multiclass.
292 buffer[bufferIndex] = 0.0;
293 if (j == event->GetClass()) {
294 buffer[bufferIndex] = 1.0;
295 }
296 }
297 } else {
298 buffer[bufferIndex] = static_cast<Double_t>(event->GetTarget(j));
299 }
300 }
301 }
302}
303
304//______________________________________________________________________________
305template <>
307 IndexIterator_t sampleIterator)
308{
309 for (size_t i = 0; i < fBatchSize; i++) {
310 size_t sampleIndex = *sampleIterator++;
311 Event *event = std::get<0>(fData)[sampleIndex];
312 buffer[i] = event->GetWeight();
313 }
314}
315
316#if 0
317//______________________________________________________________________________
318template <>
320{
321 // Get buffer tuple on device that contains the data
322 DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
323
324 std::vector<size_t> outputShape {fBatchSize, 1, fNOutputFeatures, 1};
325 std::vector<size_t> wheightShape {fBatchSize, 1, 1, 1};
326 std::vector<TCudaTensor<float> > inputTensor(1, TCudaTensor<float>(std::get<0>(DeviceBuffers),
327 this->GetTensorDim(), fInputShape));
328 TCudaTensor<float> outputMatrix(std::get<1>(DeviceBuffers), this->GetTensorDim(), outputShape);
329 TCudaTensor<float> weightMatrix(std::get<2>(DeviceBuffers), this->GetTensorDim(), wheightShape);
330
331 fBatchIndex++;
332 return TTensorBatch<TCudnn<float> >(inputTensor, outputMatrix, weightMatrix);
333}
334
335//______________________________________________________________________________
336template <>
337TTensorBatch<TCudnn<double> > TTensorDataLoader<TensorInput, TCudnn<double> >::GetTensorBatch()
338{
339 // Get buffer tuple on device that contains the data
340 DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
341
342 std::vector<size_t> outputShape {fBatchSize, 1, fNOutputFeatures, 1};
343 std::vector<size_t> wheightShape {fBatchSize, 1, 1, 1};
344 std::vector<TCudaTensor<double> > inputTensor(1, TCudaTensor<double>(std::get<0>(DeviceBuffers),
345 this->GetTensorDim(), fInputShape));
346 TCudaTensor<double> outputMatrix(std::get<1>(DeviceBuffers), this->GetTensorDim(), outputShape);
347 TCudaTensor<double> weightMatrix(std::get<2>(DeviceBuffers), this->GetTensorDim(), wheightShape);
348
349 fBatchIndex++;
350 return TTensorBatch<TCudnn<double> >(inputTensor, outputMatrix, weightMatrix);
351}
352
353//______________________________________________________________________________
354template <>
355TTensorBatch<TCudnn<float> > TTensorDataLoader<TMVAInput_t, TCudnn<float> >::GetTensorBatch()
356{
357 // Get buffer tuple on device that contains the data
358 DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
359
360 std::vector<size_t> outputShape {fBatchSize, 1, fNOutputFeatures, 1};
361 std::vector<size_t> wheightShape {fBatchSize, 1, 1, 1};
362 std::vector<TCudaTensor<float> > inputTensor(1, TCudaTensor<float>(std::get<0>(DeviceBuffers),
363 this->GetTensorDim(), fInputShape));
364 TCudaTensor<float> outputMatrix(std::get<1>(DeviceBuffers), this->GetTensorDim(), outputShape);
365 TCudaTensor<float> weightMatrix(std::get<2>(DeviceBuffers), this->GetTensorDim(), wheightShape);
366
367 fBatchIndex++;
368 return TTensorBatch<TCudnn<float> >(inputTensor, outputMatrix, weightMatrix);
369}
370
371//______________________________________________________________________________
372template <>
373TTensorBatch<TCudnn<double> > TTensorDataLoader<TMVAInput_t, TCudnn<double> >::GetTensorBatch()
374{
375 // Get buffer tuple on device that contains the data
376 DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
377
378 std::vector<size_t> outputShape {fBatchSize, 1, fNOutputFeatures, 1};
379 std::vector<size_t> wheightShape {fBatchSize, 1, 1, 1};
380 std::vector<TCudaTensor<double> > inputTensor(1, TCudaTensor<double>(std::get<0>(DeviceBuffers),
381 this->GetTensorDim(), fInputShape));
382 TCudaTensor<double> outputMatrix(std::get<1>(DeviceBuffers), fNOutputFeatures + 2, outputShape);
383 TCudaTensor<double> weightMatrix(std::get<2>(DeviceBuffers), 3, wheightShape);
384
385 fBatchIndex++;
386 return TTensorBatch<TCudnn<double> >(inputTensor, outputMatrix, weightMatrix);
387}
388#endif
389
390
391//______________________________________________________________________________
392// Explicit Instantiations.
393
394template class TTensorDataLoader<TensorInput, TCudnn<float> >;
395template class TTensorDataLoader<TMVAInput_t, TCudnn<float> >;
396template class TTensorDataLoader<TensorInput, TCudnn<double> >;
397template class TTensorDataLoader<TMVAInput_t, TCudnn<double> >;
398
399} // TMVA
400} // DNN
double Double_t
Definition: RtypesCore.h:55
float Float_t
Definition: RtypesCore.h:53
#define R__ASSERT(e)
Definition: TError.h:96
void Error(const char *location, const char *msgfmt,...)
TCudaHostBuffer.
Definition: CudaBuffers.h:43
size_t GetSize() const
Definition: CudaBuffers.h:84
Class that contains all the data information.
Definition: DataSetInfo.h:60
Bool_t IsSignal(const Event *ev) const
Int_t GetNcols() const
Definition: TMatrixTBase.h:127
const Int_t n
Definition: legend1.C:16
typename std::vector< size_t >::iterator IndexIterator_t
Definition: DataLoader.h:42
create variable transformations