15#ifndef TMVA_RCHUNKLOADER
16#define TMVA_RCHUNKLOADER
30namespace Experimental {
106template <
typename... Args>
139 const std::size_t blockSize,
const float validationSplit,
const std::vector<std::string> &
cols,
172 std::random_device
rd;
184 for (
size_t i = 0; i <
fTraining->NumberOfDifferentBlocks.size(); i++) {
188 for (
size_t i = 0; i <
fValidation->NumberOfDifferentBlocks.size(); i++) {
193 std::vector<Long_t> indices(
BlockSizes.size());
195 for (
int i = 0; i < indices.size(); ++i) {
201 std::shuffle(indices.begin(), indices.end(),
g);
219 std::vector<std::pair<Long_t, Long_t>> BlockIntervals;
227 for (
int i = 0; i < BlockIntervals.size(); ++i) {
248 std::random_device
rd;
259 std::shuffle(
fTraining->FullBlockIntervalsInFullChunks.begin(),
260 fTraining->FullBlockIntervalsInFullChunks.end(),
g);
261 std::shuffle(
fTraining->LeftoverBlockIntervalsInFullChunks.begin(),
262 fTraining->LeftoverBlockIntervalsInFullChunks.end(),
g);
263 std::shuffle(
fTraining->FullBlockIntervalsInLeftoverChunks.begin(),
264 fTraining->FullBlockIntervalsInLeftoverChunks.end(),
g);
265 std::shuffle(
fTraining->LeftoverBlockIntervalsInLeftoverChunks.begin(),
266 fTraining->LeftoverBlockIntervalsInLeftoverChunks.end(),
g);
287 std::random_device
rd;
297 std::shuffle(
fValidation->FullBlockIntervalsInFullChunks.begin(),
299 std::shuffle(
fValidation->LeftoverBlockIntervalsInFullChunks.begin(),
300 fValidation->LeftoverBlockIntervalsInFullChunks.end(),
g);
301 std::shuffle(
fValidation->FullBlockIntervalsInLeftoverChunks.begin(),
302 fValidation->FullBlockIntervalsInLeftoverChunks.end(),
g);
303 std::shuffle(
fValidation->LeftoverBlockIntervalsInLeftoverChunks.begin(),
304 fValidation->LeftoverBlockIntervalsInLeftoverChunks.end(),
g);
326 std::random_device
rd;
343 std::iota(indices.begin(), indices.end(), 0);
347 std::shuffle(indices.begin(), indices.end(),
g);
355 [](
const std::pair<Long_t, Long_t>&
a,
const std::pair<Long_t, Long_t>&
b) {
356 return a.first < b.first;
373 for (std::size_t
j = 0;
j < blockSize;
j++) {
384 for (std::size_t i = 0; i <
chunkSize; i++) {
399 std::random_device
rd;
416 std::iota(indices.begin(), indices.end(), 0);
420 std::shuffle(indices.begin(), indices.end(),
g);
427 [](
const std::pair<Long_t, Long_t>&
a,
const std::pair<Long_t, Long_t>&
b) {
428 return a.first < b.first;
444 for (std::size_t
j = 0;
j < blockSize;
j++) {
456 for (std::size_t i = 0; i <
chunkSize; i++) {
482 std::cout <<
"Tensor consists of only unique elements" << std::endl;
515 std::cout <<
"No overlap between the tensors" << std::endl;
517 std::cout <<
"Intersection between tensors: ";
519 std::cout << num <<
" ";
521 std::cout << std::endl;
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
The public interface to the RDataFrame federation of classes.
void Foreach(F f, const ColumnNames_t &columns={})
Execute a user-defined function on each entry (instant action).
Smart pointer for the return type of actions.
const_iterator begin() const
const_iterator end() const
std::size_t fNumChunkCols
void AssignToTensor(const T &vec, int i, int numColumns)
Copy the content of a column into RTensor when the column consits of vectors.
std::vector< std::size_t > fMaxVecSizes
void AssignToTensor(const T &val, int i, int numColumns)
Copy the content of a column into RTensor when the column consits of single values.
void operator()(const ColTypes &...cols)
RChunkLoaderFunctor(TMVA::Experimental::RTensor< float > &chunkTensor, std::size_t numColumns, const std::vector< std::size_t > &maxVecSizes, float vecPadding, int i)
TMVA::Experimental::RTensor< float > & fChunkTensor
std::size_t GetNumValidationEntries()
RChunkLoader(ROOT::RDF::RNode &rdf, std::size_t numEntries, ROOT::RDF::RResultPtr< std::vector< ULong64_t > > rdf_entries, const std::size_t chunkSize, const std::size_t blockSize, const float validationSplit, const std::vector< std::string > &cols, const std::vector< std::size_t > &vecSizes={}, const float vecPadding=0.0, bool shuffle=true, const std::size_t setSeed=0)
void LoadTrainingChunk(TMVA::Experimental::RTensor< float > &TrainChunkTensor, std::size_t chunk)
Load the nth chunk from the training dataset into a tensor.
std::unique_ptr< RChunkConstructor > fValidation
std::vector< std::string > fCols
std::size_t GetNumTrainingEntries()
std::size_t GetNumValidationChunks()
std::vector< std::size_t > GetTrainingChunkSizes()
void CheckIfOverlap(TMVA::Experimental::RTensor< float > &Tensor1, TMVA::Experimental::RTensor< float > &Tensor2)
ROOT::RDF::RResultPtr< std::vector< ULong64_t > > fEntries
void CheckIfUnique(TMVA::Experimental::RTensor< float > &Tensor)
std::vector< std::size_t > GetValidationChunkSizes()
void SplitDataset()
Distribute the blocks into training and validation datasets.
void CreateValidationChunksIntervals()
Create training chunks consisiting of block intervals of different types.
void CreateTrainingChunksIntervals()
Create training chunks consisiting of block intervals of different types.
std::vector< std::size_t > fVecSizes
std::size_t fNumTrainEntries
std::size_t fNumChunkCols
std::unique_ptr< RChunkConstructor > fTraining
std::size_t fNumValidationEntries
void LoadValidationChunk(TMVA::Experimental::RTensor< float > &ValidationChunkTensor, std::size_t chunk)
Load the nth chunk from the validation dataset into a tensor.
std::size_t GetNumTrainingChunks()
RTensor is a container with contiguous memory and shape information.
void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end)
create variable transformations