Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
ROOT::Experimental::Internal::ML::RChunkLoader< Args > Class Template Reference

template<typename... Args>
class ROOT::Experimental::Internal::ML::RChunkLoader< Args >

Building and loading the chunks from the blocks and chunks constructed in RChunkConstructor.

In this class the blocks are stiches together to form chunks that are loaded into memory. The blocks used to create each chunk comes from different parts of the dataset. This is achieved by shuffling the blocks before distributing them into chunks. The purpose of this process is to reduce bias during machine learning training by ensuring that the data is well mixed. The dataset is also spit into training and validation sets with the user-defined validation split fraction.

Definition at line 112 of file RChunkLoader.hxx.

Public Member Functions

 RChunkLoader (ROOT::RDF::RNode &rdf, const std::size_t chunkSize, const std::size_t blockSize, const float validationSplit, const std::vector< std::string > &cols, const std::vector< std::size_t > &vecSizes={}, const float vecPadding=0.0, bool shuffle=true, const std::size_t setSeed=0)
 
void CheckIfOverlap (RFlat2DMatrix &Tensor1, RFlat2DMatrix &Tensor2)
 
void CheckIfUnique (RFlat2DMatrix &Tensor)
 
void CreateTrainingChunksIntervals ()
 Create training chunks consisiting of block intervals of different types.
 
void CreateValidationChunksIntervals ()
 Create training chunks consisiting of block intervals of different types.
 
std::size_t GetNumTrainingChunks ()
 
std::size_t GetNumTrainingEntries ()
 
std::size_t GetNumValidationChunks ()
 
std::size_t GetNumValidationEntries ()
 
std::vector< std::size_t > GetTrainingChunkSizes ()
 
std::vector< std::size_t > GetValidationChunkSizes ()
 
void LoadTrainingChunk (RFlat2DMatrix &TrainChunkTensor, std::size_t chunk)
 Load the nth chunk from the training dataset into a tensor.
 
void LoadValidationChunk (RFlat2DMatrix &ValidationChunkTensor, std::size_t chunk)
 Load the nth chunk from the validation dataset into a tensor.
 
void ResetDataframe ()
 
void SplitDataset ()
 Distribute the blocks into training and validation datasets.
 

Private Attributes

ROOT::RDF::RNodef_rdf
 
std::size_t fBlockSize
 
std::size_t fChunkSize
 
std::vector< std::string > fCols
 
ROOT::RDF::RResultPtr< std::vector< ULong64_t > > fEntries
 
bool fNotFiltered
 
std::size_t fNumChunkCols
 
std::size_t fNumCols
 
std::size_t fNumEntries
 
std::size_t fNumTrainEntries
 
std::size_t fNumValidationEntries
 
std::size_t fSetSeed
 
bool fShuffle
 
std::size_t fSumVecSizes
 
std::unique_ptr< RFlat2DMatrixOperatorsfTensorOperators
 
std::unique_ptr< RChunkConstructorfTraining
 
std::unique_ptr< RChunkConstructorfValidation
 
float fValidationSplit
 
std::size_t fVecPadding
 
std::vector< std::size_t > fVecSizes
 

#include <ROOT/ML/RChunkLoader.hxx>

Constructor & Destructor Documentation

◆ RChunkLoader()

template<typename... Args>
ROOT::Experimental::Internal::ML::RChunkLoader< Args >::RChunkLoader ( ROOT::RDF::RNode & rdf,
const std::size_t chunkSize,
const std::size_t blockSize,
const float validationSplit,
const std::vector< std::string > & cols,
const std::vector< std::size_t > & vecSizes = {},
const float vecPadding = 0.0,
bool shuffle = true,
const std::size_t setSeed = 0 )
inline

Definition at line 142 of file RChunkLoader.hxx.

Member Function Documentation

◆ CheckIfOverlap()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::CheckIfOverlap ( RFlat2DMatrix & Tensor1,
RFlat2DMatrix & Tensor2 )
inline

Definition at line 448 of file RChunkLoader.hxx.

◆ CheckIfUnique()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::CheckIfUnique ( RFlat2DMatrix & Tensor)
inline

Definition at line 440 of file RChunkLoader.hxx.

◆ CreateTrainingChunksIntervals()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::CreateTrainingChunksIntervals ( )
inline

Create training chunks consisiting of block intervals of different types.

Definition at line 255 of file RChunkLoader.hxx.

◆ CreateValidationChunksIntervals()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::CreateValidationChunksIntervals ( )
inline

Create training chunks consisiting of block intervals of different types.

Definition at line 295 of file RChunkLoader.hxx.

◆ GetNumTrainingChunks()

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::GetNumTrainingChunks ( )
inline

Definition at line 471 of file RChunkLoader.hxx.

◆ GetNumTrainingEntries()

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::GetNumTrainingEntries ( )
inline

Definition at line 437 of file RChunkLoader.hxx.

◆ GetNumValidationChunks()

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::GetNumValidationChunks ( )
inline

Definition at line 473 of file RChunkLoader.hxx.

◆ GetNumValidationEntries()

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::GetNumValidationEntries ( )
inline

Definition at line 438 of file RChunkLoader.hxx.

◆ GetTrainingChunkSizes()

template<typename... Args>
std::vector< std::size_t > ROOT::Experimental::Internal::ML::RChunkLoader< Args >::GetTrainingChunkSizes ( )
inline

Definition at line 434 of file RChunkLoader.hxx.

◆ GetValidationChunkSizes()

template<typename... Args>
std::vector< std::size_t > ROOT::Experimental::Internal::ML::RChunkLoader< Args >::GetValidationChunkSizes ( )
inline

Definition at line 435 of file RChunkLoader.hxx.

◆ LoadTrainingChunk()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::LoadTrainingChunk ( RFlat2DMatrix & TrainChunkTensor,
std::size_t chunk )
inline

Load the nth chunk from the training dataset into a tensor.

Parameters
[in]TrainChunkTensorRTensor for the training chunk
[in]chunkIndex of the chunk in the dataset

Definition at line 333 of file RChunkLoader.hxx.

◆ LoadValidationChunk()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::LoadValidationChunk ( RFlat2DMatrix & ValidationChunkTensor,
std::size_t chunk )
inline

Load the nth chunk from the validation dataset into a tensor.

Parameters
[in]ValidationChunkTensorRTensor for the validation chunk
[in]chunkIndex of the chunk in the dataset

Definition at line 385 of file RChunkLoader.hxx.

◆ ResetDataframe()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::ResetDataframe ( )
inline

Definition at line 432 of file RChunkLoader.hxx.

◆ SplitDataset()

template<typename... Args>
void ROOT::Experimental::Internal::ML::RChunkLoader< Args >::SplitDataset ( )
inline

Distribute the blocks into training and validation datasets.

Definition at line 180 of file RChunkLoader.hxx.

Member Data Documentation

◆ f_rdf

template<typename... Args>
ROOT::RDF::RNode& ROOT::Experimental::Internal::ML::RChunkLoader< Args >::f_rdf
private

Definition at line 128 of file RChunkLoader.hxx.

◆ fBlockSize

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fBlockSize
private

Definition at line 116 of file RChunkLoader.hxx.

◆ fChunkSize

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fChunkSize
private

Definition at line 115 of file RChunkLoader.hxx.

◆ fCols

template<typename... Args>
std::vector<std::string> ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fCols
private

Definition at line 129 of file RChunkLoader.hxx.

◆ fEntries

template<typename... Args>
ROOT::RDF::RResultPtr<std::vector<ULong64_t> > ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fEntries
private

Definition at line 136 of file RChunkLoader.hxx.

◆ fNotFiltered

template<typename... Args>
bool ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fNotFiltered
private

Definition at line 133 of file RChunkLoader.hxx.

◆ fNumChunkCols

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fNumChunkCols
private

Definition at line 122 of file RChunkLoader.hxx.

◆ fNumCols

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fNumCols
private

Definition at line 130 of file RChunkLoader.hxx.

◆ fNumEntries

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fNumEntries
private

Definition at line 114 of file RChunkLoader.hxx.

◆ fNumTrainEntries

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fNumTrainEntries
private

Definition at line 124 of file RChunkLoader.hxx.

◆ fNumValidationEntries

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fNumValidationEntries
private

Definition at line 125 of file RChunkLoader.hxx.

◆ fSetSeed

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fSetSeed
private

Definition at line 131 of file RChunkLoader.hxx.

◆ fShuffle

template<typename... Args>
bool ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fShuffle
private

Definition at line 134 of file RChunkLoader.hxx.

◆ fSumVecSizes

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fSumVecSizes
private

Definition at line 120 of file RChunkLoader.hxx.

◆ fTensorOperators

template<typename... Args>
std::unique_ptr<RFlat2DMatrixOperators> ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fTensorOperators
private

Definition at line 126 of file RChunkLoader.hxx.

◆ fTraining

template<typename... Args>
std::unique_ptr<RChunkConstructor> ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fTraining
private

Definition at line 138 of file RChunkLoader.hxx.

◆ fValidation

template<typename... Args>
std::unique_ptr<RChunkConstructor> ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fValidation
private

Definition at line 139 of file RChunkLoader.hxx.

◆ fValidationSplit

template<typename... Args>
float ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fValidationSplit
private

Definition at line 117 of file RChunkLoader.hxx.

◆ fVecPadding

template<typename... Args>
std::size_t ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fVecPadding
private

Definition at line 121 of file RChunkLoader.hxx.

◆ fVecSizes

template<typename... Args>
std::vector<std::size_t> ROOT::Experimental::Internal::ML::RChunkLoader< Args >::fVecSizes
private

Definition at line 119 of file RChunkLoader.hxx.

  • tree/ml/inc/ROOT/ML/RChunkLoader.hxx