Logo ROOT   6.14/05
Reference Guide
DataLoader.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 21/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////////////
13 // Implementation for the DataLoader for the the multi-threaded //
14 // CPU implementation of DNNs. //
15 //////////////////////////////////////////////////////////////////
16 
17 #include "TMVA/DNN/Architectures/Cpu/DataLoader.h"
18 #include "TMVA/Event.h"
19 #include <iostream>
20 #include <random>
21 
22 namespace TMVA
23 {
24 namespace DNN
25 {
26 
27 // TCpuBatchIterator
28 //______________________________________________________________________________
29 template<typename Data_t, typename Real_t>
30 TCpuBatchIterator<Data_t, Real_t>::TCpuBatchIterator(
31  TCpuDataLoader<Data_t, Real_t> & dataLoader,
32  size_t batchIndex)
33  : fDataLoader(dataLoader), fBatchIndex(batchIndex)
34 {
35  // Nothing to do here.
36 }
37 
38 //______________________________________________________________________________
39 template<typename Data_t, typename Real_t>
41 {
42  return fDataLoader.GetBatch(fBatchIndex);
43 }
44 
45 //______________________________________________________________________________
46 template<typename Data_t, typename Real_t>
47 TCpuBatchIterator<Data_t, Real_t> & TCpuBatchIterator<Data_t, Real_t>::operator++()
48 {
49  fBatchIndex++;
50  return *this;
51 }
52 
53 //______________________________________________________________________________
54 template<typename Data_t, typename Real_t>
55 bool TCpuBatchIterator<Data_t, Real_t>::operator!=(const TCpuBatchIterator & other)
56 {
57  return fBatchIndex != other.GetBatchIndex();
58 }
59 
60 //______________________________________________________________________________
61 template<typename Data_t, typename Real_t>
62 bool TCpuBatchIterator<Data_t, Real_t>::operator==(const TCpuBatchIterator & other)
63 {
64  return fBatchIndex == other.GetBatchIndex();
65 }
66 
67 // TCpuDataLoader
68 //______________________________________________________________________________
69 template<typename Data_t, typename Real_t>
70 TCpuDataLoader<Data_t, Real_t>::TCpuDataLoader(const Data_t &input,
71  size_t nsamples,
72  size_t batchSize,
73  size_t ninputFeatures,
74  size_t noutputFeatures,
75  size_t bufferSize)
76  : fInput(input), fNSamples(nsamples), fBatchSize(batchSize),
77  fBufferSize(bufferSize), fNInputFeatures(ninputFeatures),
78  fNOutputFeatures(noutputFeatures), fNBatches(nsamples / batchSize),
79  fInputMatrices(), fOutputMatrices(), fSampleIndices()
80 {
81  fInputMatrices.reserve(fBufferSize);
82  fOutputMatrices.reserve(fBufferSize);
83  for (size_t i = 0; i < fBufferSize; i++) {
84  fInputMatrices.emplace_back(fBatchSize, fNInputFeatures);
85  fOutputMatrices.emplace_back(fBatchSize, fNOutputFeatures);
86  }
87 
88  fSampleIndices.reserve(fNBatches);
89  for (size_t i = 0; i < fNSamples; i++) {
90  fSampleIndices.emplace_back(i);
91  }
92 }
93 
94 //______________________________________________________________________________
95 template<typename Data_t, typename Real_t>
96 inline void TCpuDataLoader<Data_t, Real_t>::CopyData(size_t batchIndex)
97 {
98  auto copy = [this](UInt_t workerID)
99  {
100  CopyBatch(this->fInputMatrices[workerID % this->fBufferSize],
101  this->fOutputMatrices[workerID % this->fBufferSize],
102  this->fInput,
103  this->fSampleIndices.begin() + sampleIndex,
104  this->fSampleIndices.begin() + sampleIndex + this->fBatchSize);
105  sampleIndex += this->fBatchSize;
106  return 0;
107  };
108 
109  size_t end = std::min(batchIndex + fBufferSize, fNBatches);
110  size_t start = batchIndex;
111  ROOT::TThreadExecutor pool{};
112  pool.Map(copy, ROOT::TSeqI(start, end));
113 }
114 
115 //______________________________________________________________________________
116 template<typename Data_t, typename Real_t>
117 TCpuBatch<Real_t> TCpuDataLoader<Data_t, Real_t>::GetBatch(size_t batchIndex)
118 {
119  size_t fBufferIndex = batchIndex % fBufferSize;
120  if (fBufferIndex == 0) {
121  CopyData(batchIndex);
122  }
123  return TCpuBatch<Real_t>(fInputMatrices[fBufferIndex],
124  fOutputMatrices[fBufferIndex]);
125 }
126 
127 //______________________________________________________________________________
128 template<typename Data_t, typename Real_t>
129 auto TCpuDataLoader<Data_t, Real_t>::begin()
130  -> BatchIterator_t
131 {
132  std::shuffle(fSampleIndices.begin(), fSampleIndices.end(), std::default_random_engine{});
133  return BatchIterator_t(*this, 0);
134 }
135 
136 //______________________________________________________________________________
137 template<typename Data_t, typename Real_t>
138 auto TCpuDataLoader<Data_t, Real_t>::end()
139  -> BatchIterator_t
140 {
141  return BatchIterator_t(*this, fNBatches);
142 }
143 
144 //______________________________________________________________________________
145 template <>
146 void TCpuDataLoader<MatrixInput_t, Double_t>::CopyBatch(
147  Matrix_t &inputMatrix,
148  Matrix_t &outputMatrix,
149  const MatrixInput_t &input,
150  IndexIterator_t indexBegin,
151  IndexIterator_t indexEnd)
152 {
153  auto &in = std::get<0>(input);
154  auto &out = std::get<1>(input);
155 
156  size_t batchIndex = 0;
157  for (IndexIterator_t i = indexBegin; i != indexEnd; i++) {
158  size_t index = *i;
159  for (size_t j = 0; j < (size_t) in.GetNcols(); j++) {
160  inputMatrix(batchIndex, j) = in(index, j);
161  }
162  for (size_t j = 0; j < (size_t) out.GetNcols(); j++) {
163  outputMatrix(batchIndex, j) = out(index, j);
164  }
165  batchIndex++;
166  }
167 }
168 
169 //______________________________________________________________________________
170 template <>
171 void TCpuDataLoader<TMVAInput_t, Double_t>::CopyBatch(
172  Matrix_t &inputMatrix,
173  Matrix_t &outputMatrix,
174  const TMVAInput_t &input,
175  IndexIterator_t indexBegin,
176  IndexIterator_t indexEnd)
177 {
178  size_t batchIndex = 0;
179  for (IndexIterator_t i = indexBegin; i != indexEnd; i++) {
180  size_t index = *i;
181  Event *event = input.at(index);
182  for (size_t j = 0; j < event->GetNVariables(); j++) {
183  inputMatrix(batchIndex, j) = event->GetValue(j);
184  }
185  if (event->GetNTargets() > 0) {
186  for (size_t j = 0; j < event->GetNTargets(); j++) {
187  outputMatrix(batchIndex, j) = event->GetTarget(j);
188  }
189  } else {
190  outputMatrix(batchIndex, 0) = (event->GetClass() == 0) ? 1.0 : 0.0;
191  batchIndex++;
192  }
193  }
194 }
195 
196 // Explicit instantiation.
197 //______________________________________________________________________________
198 template class TCpuDataLoader<MatrixInput_t, Double_t>;
199 template class TCpuDataLoader<TMVAInput_t, Double_t>;
200 template class TCpuBatchIterator<MatrixInput_t, Double_t>;
201 template class TCpuBatchIterator<TMVAInput_t, Double_t>;
202 template class TCpuBatch<Double_t>;
203 
204 } // namespace DNN
205 } // namespace TMVA
std::tuple< const TMatrixT< Double_t > &, const TMatrixT< Double_t > &, const TMatrixT< Double_t > & > MatrixInput_t
Definition: DataLoader.h:38
Bool_t operator!=(const TDatime &d1, const TDatime &d2)
Definition: TDatime.h:104
typename std::vector< size_t >::iterator IndexIterator_t
Definition: DataLoader.h:42
This class provides a simple interface to execute the same task multiple times in parallel...
TTime operator*(const TTime &t1, const TTime &t2)
Definition: TTime.h:85
unsigned int UInt_t
Definition: RtypesCore.h:42
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
Bool_t operator==(const TDatime &d1, const TDatime &d2)
Definition: TDatime.h:102
Abstract ClassifierFactory template that handles arbitrary types.
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition: DataLoader.h:40