Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
RBatchLoader.hxx
Go to the documentation of this file.
1// Author: Dante Niewenhuis, VU Amsterdam 07/2023
2// Author: Kristupas Pranckietis, Vilnius University 05/2024
3// Author: Nopphakorn Subsa-Ard, King Mongkut's University of Technology Thonburi (KMUTT) (TH) 08/2024
4// Author: Vincenzo Eduardo Padulano, CERN 10/2024
5
6/*************************************************************************
7 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. *
8 * All rights reserved. *
9 * *
10 * For the licensing terms see $ROOTSYS/LICENSE. *
11 * For the list of contributors see $ROOTSYS/README/CREDITS. *
12 *************************************************************************/
13
14#ifndef TMVA_RBATCHLOADER
15#define TMVA_RBATCHLOADER
16
17#include <vector>
18#include <memory>
19#include <numeric>
20
21// Imports for threading
22#include <queue>
23#include <mutex>
24#include <condition_variable>
25
26#include "TMVA/RTensor.hxx"
27#include "TMVA/Tools.h"
28
29namespace TMVA {
30namespace Experimental {
31namespace Internal {
32
34private:
36 std::size_t fBatchSize;
37 std::size_t fNumColumns;
38 std::size_t fMaxBatches;
39 std::size_t fTrainingRemainderRow = 0;
40 std::size_t fValidationRemainderRow = 0;
41
42 bool fIsActive = false;
43
44 std::mutex fBatchLock;
45 std::condition_variable fBatchCondition;
46
47 std::queue<std::unique_ptr<TMVA::Experimental::RTensor<float>>> fTrainingBatchQueue;
48 std::queue<std::unique_ptr<TMVA::Experimental::RTensor<float>>> fValidationBatchQueue;
49 std::unique_ptr<TMVA::Experimental::RTensor<float>> fCurrentBatch;
50
51 std::unique_ptr<TMVA::Experimental::RTensor<float>> fTrainingRemainder;
52 std::unique_ptr<TMVA::Experimental::RTensor<float>> fValidationRemainder;
53
54public:
55 RBatchLoader(const TMVA::Experimental::RTensor<float> &chunkTensor, const std::size_t batchSize,
56 const std::size_t numColumns, const std::size_t maxBatches)
58 {
59 // Create remainders tensors
61 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{fBatchSize - 1, fNumColumns});
63 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{fBatchSize - 1, fNumColumns});
64 }
65
67
68public:
69 /// \brief Return a batch of data as a unique pointer.
70 /// After the batch has been processed, it should be destroyed.
71 /// \return Training batch
73 {
74 std::unique_lock<std::mutex> lock(fBatchLock);
75 fBatchCondition.wait(lock, [this]() { return !fTrainingBatchQueue.empty() || !fIsActive; });
76
77 if (fTrainingBatchQueue.empty()) {
78 fCurrentBatch = std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>({0}));
79 return *fCurrentBatch;
80 }
81
82 fCurrentBatch = std::move(fTrainingBatchQueue.front());
84
85 fBatchCondition.notify_all();
86
87 return *fCurrentBatch;
88 }
89
90 /// \brief Returns a batch of data for validation
91 /// The owner of this batch has to be with the RBatchLoader.
92 /// This is because the same validation batches should be used in all epochs.
93 /// \return Validation batch
95 {
96 if (fValidationBatchQueue.empty()) {
97 fCurrentBatch = std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>({0}));
98 return *fCurrentBatch;
99 }
100
101 fCurrentBatch = std::move(fValidationBatchQueue.front());
103
104 return *fCurrentBatch;
105 }
106
107 /// \brief Activate the batchloader so it will accept chunks to batch
108 void Activate()
109 {
112
113 {
114 std::lock_guard<std::mutex> lock(fBatchLock);
115 fIsActive = true;
116 }
117 fBatchCondition.notify_all();
118 }
119
120 /// \brief DeActivate the batchloader. This means that no more batches are created.
121 /// Batches can still be returned if they are already loaded
123 {
124 {
125 std::lock_guard<std::mutex> lock(fBatchLock);
126 fIsActive = false;
127 }
128 fBatchCondition.notify_all();
129 }
130
131 std::unique_ptr<TMVA::Experimental::RTensor<float>>
133 std::size_t batchSize)
134 {
135 auto batch =
136 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>({batchSize, fNumColumns}));
137
138 for (std::size_t i = 0; i < batchSize; i++) {
139 std::copy(chunkTensor.GetData() + (idxs[i] * fNumColumns),
140 chunkTensor.GetData() + ((idxs[i] + 1) * fNumColumns), batch->GetData() + i * fNumColumns);
141 }
142
143 return batch;
144 }
145
146 std::unique_ptr<TMVA::Experimental::RTensor<float>>
148 std::span<const std::size_t> eventIndices)
149 {
150 auto batch =
151 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>({fBatchSize, fNumColumns}));
152
153 for (size_t i = 0; i < remainderTensorRow; i++) {
154 std::copy(remainderTensor.GetData() + i * fNumColumns, remainderTensor.GetData() + (i + 1) * fNumColumns,
155 batch->GetData() + i * fNumColumns);
156 }
157
158 for (std::size_t i = 0; i < (fBatchSize - remainderTensorRow); i++) {
159 std::copy(fChunkTensor.GetData() + eventIndices[i] * fNumColumns,
160 fChunkTensor.GetData() + (eventIndices[i] + 1) * fNumColumns,
161 batch->GetData() + (i + remainderTensorRow) * fNumColumns);
162 }
163
164 return batch;
165 }
166
167 /// @brief save to remaining data when the whole chunk has to be saved
168 /// @param chunkTensor
169 /// @param remainderTensor
170 /// @param remainderTensorRow
171 /// @param eventIndices
173 const std::vector<std::size_t> eventIndices, const std::size_t start = 0)
174 {
175 for (std::size_t i = start; i < eventIndices.size(); i++) {
176 std::copy(fChunkTensor.GetData() + eventIndices[i] * fNumColumns,
177 fChunkTensor.GetData() + (eventIndices[i] + 1) * fNumColumns,
178 remainderTensor.GetData() + (i - start + remainderTensorRow) * fNumColumns);
179 }
180 }
181
182 /// \brief Create training batches from the given chunk of data based on the given event indices
183 /// Batches are added to the training queue of batches
184 /// \param chunkTensor
185 /// \param eventIndices
186 void CreateTrainingBatches(const std::vector<std::size_t> &eventIndices)
187 {
188 // Wait until less than a full chunk of batches are in the queue before splitting the next chunk into
189 // batches
190 {
191 std::unique_lock<std::mutex> lock(fBatchLock);
192 fBatchCondition.wait(lock, [this]() { return (fTrainingBatchQueue.size() < fMaxBatches) || !fIsActive; });
193 if (!fIsActive)
194 return;
195 }
196
197 std::vector<std::unique_ptr<TMVA::Experimental::RTensor<float>>> batches;
198
201 } else {
204 return;
205 }
206
207 // Create tasks of fBatchSize until all idx are used
208 std::size_t start = fBatchSize - fTrainingRemainderRow;
209 for (; (start + fBatchSize) <= eventIndices.size(); start += fBatchSize) {
210 // Grab the first fBatchSize indices
211 std::span<const std::size_t> idxs{eventIndices.data() + start, eventIndices.data() + start + fBatchSize};
212
213 // Fill a batch
215 }
216
217 {
218 std::unique_lock<std::mutex> lock(fBatchLock);
219 for (std::size_t i = 0; i < batches.size(); i++) {
220 fTrainingBatchQueue.push(std::move(batches[i]));
221 }
222 }
223
224 fBatchCondition.notify_all();
225
226 fTrainingRemainderRow = eventIndices.size() - start;
228 }
229
230 /// \brief Create validation batches from the given chunk based on the given event indices
231 /// Batches are added to the vector of validation batches
232 /// \param chunkTensor
233 /// \param eventIndices
234 void CreateValidationBatches(const std::vector<std::size_t> &eventIndices)
235 {
238 } else {
241 return;
242 }
243
244 // Create tasks of fBatchSize untill all idx are used
245 std::size_t start = fBatchSize - fValidationRemainderRow;
246 for (; (start + fBatchSize) <= eventIndices.size(); start += fBatchSize) {
247
248 std::vector<std::size_t> idx;
249
250 for (std::size_t i = start; i < (start + fBatchSize); i++) {
251 idx.push_back(eventIndices[i]);
252 }
253
255 }
256
257 fValidationRemainderRow = eventIndices.size() - start;
259 }
260
262 {
263 {
265 std::vector<std::size_t> idx = std::vector<std::size_t>(fTrainingRemainderRow);
266 std::iota(idx.begin(), idx.end(), 0);
267
268 std::unique_ptr<TMVA::Experimental::RTensor<float>> batch =
270
271 std::unique_lock<std::mutex> lock(fBatchLock);
272 fTrainingBatchQueue.push(std::move(batch));
273 }
274 }
275
277 std::vector<std::size_t> idx = std::vector<std::size_t>(fValidationRemainderRow);
278 std::iota(idx.begin(), idx.end(), 0);
279
281 }
282 }
283};
284
285} // namespace Internal
286} // namespace Experimental
287} // namespace TMVA
288
289#endif // TMVA_RBATCHLOADER
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
void Activate()
Activate the batchloader so it will accept chunks to batch.
RBatchLoader(const TMVA::Experimental::RTensor< float > &chunkTensor, const std::size_t batchSize, const std::size_t numColumns, const std::size_t maxBatches)
std::unique_ptr< TMVA::Experimental::RTensor< float > > fTrainingRemainder
void CreateValidationBatches(const std::vector< std::size_t > &eventIndices)
Create validation batches from the given chunk based on the given event indices Batches are added to ...
std::unique_ptr< TMVA::Experimental::RTensor< float > > CreateFirstBatch(const TMVA::Experimental::RTensor< float > &remainderTensor, std::size_t remainderTensorRow, std::span< const std::size_t > eventIndices)
std::queue< std::unique_ptr< TMVA::Experimental::RTensor< float > > > fValidationBatchQueue
void CreateTrainingBatches(const std::vector< std::size_t > &eventIndices)
Create training batches from the given chunk of data based on the given event indices Batches are add...
const TMVA::Experimental::RTensor< float > & GetTrainBatch()
Return a batch of data as a unique pointer.
const TMVA::Experimental::RTensor< float > & fChunkTensor
std::unique_ptr< TMVA::Experimental::RTensor< float > > CreateBatch(const TMVA::Experimental::RTensor< float > &chunkTensor, std::span< const std::size_t > idxs, std::size_t batchSize)
std::unique_ptr< TMVA::Experimental::RTensor< float > > fCurrentBatch
void SaveRemainingData(TMVA::Experimental::RTensor< float > &remainderTensor, const std::size_t remainderTensorRow, const std::vector< std::size_t > eventIndices, const std::size_t start=0)
save to remaining data when the whole chunk has to be saved
std::unique_ptr< TMVA::Experimental::RTensor< float > > fValidationRemainder
const TMVA::Experimental::RTensor< float > & GetValidationBatch()
Returns a batch of data for validation The owner of this batch has to be with the RBatchLoader.
void DeActivate()
DeActivate the batchloader.
std::queue< std::unique_ptr< TMVA::Experimental::RTensor< float > > > fTrainingBatchQueue
RTensor is a container with contiguous memory and shape information.
Definition RTensor.hxx:163
create variable transformations