doc/master/TensorDataLoader_8h_source.html

// @(#)root/tmva/tmva/dnn:$Id$

// Author: Vladimir Ilievski


/**********************************************************************************

 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *

 * Package: TMVA                                                                  *

 * Class  : TTensorDataLoader                                                     *

 *                                             *

 *                                                                                *

 * Description:                                                                   *

 *      Tensor Data Loader Class                                                  *

 *                                                                                *

 * Authors (alphabetical):                                                        *

 *      Vladimir Ilievski      <ilievski.vladimir@live.com>  - CERN, Switzerland  *

 *                                                                                *

 * Copyright (c) 2005-2015:                                                       *

 *      CERN, Switzerland                                                         *

 *      U. of Victoria, Canada                                                    *

 *      MPI-K Heidelberg, Germany                                                 *

 *      U. of Bonn, Germany                                                       *

 *                                                                                *

 * Redistribution and use in source and binary forms, with or without             *

 * modification, are permitted according to the terms listed in LICENSE           *

 * (see tmva/doc/LICENSE)                                          *

 **********************************************************************************/


#ifndef TMVA_DNN_TENSORDATALOADER

#define TMVA_DNN_TENSORDATALOADER


#include "TMatrix.h"

#include "TMVA/Event.h"

#include <algorithm>

#include <vector>

#include <utility>


namespace TMVA {

   class DataSetInfo;

namespace DNN {


//

// Input Data Types

//______________________________________________________________________________

using TensorInput =

   std::tuple<const std::vector<TMatrixT<Double_t>> &, const TMatrixT<Double_t> &, const TMatrixT<Double_t> &>;


using TMVAInput_t =  std::tuple<const std::vector<Event *> &, const DataSetInfo &>;

using IndexIterator_t = typename std::vector<size_t>::iterator;


/** TTensorBatch

 *

 * Class representing training batches consisting of a vector of matrices as input data

 * and a matrix of output data. The input and output data can be accessed using

 * the GetInput() and GetOutput() member functions.

 *

 * \tparam Architecture_t The underlying architecture.

 */


template <typename Architecture_t>

class TTensorBatch {

public:

   using Matrix_t = typename Architecture_t::Matrix_t;

   using Tensor_t = typename Architecture_t::Tensor_t;


private:

   Tensor_t  fInputTensor;         ///< The input tensor batch, one matrix one input.

   Matrix_t fOutputMatrix;         ///< The output matrix representing the ground truth.

   Matrix_t fWeightMatrix;         ///< The event/example weights


public:

   TTensorBatch(Tensor_t &, Matrix_t &, Matrix_t &);

   TTensorBatch(const TTensorBatch &) = default;

   TTensorBatch(TTensorBatch &&) = default;

   TTensorBatch &operator=(const TTensorBatch &) = default;

   TTensorBatch &operator=(TTensorBatch &&) = default;


   /** Return the tensor representing the input data */

   Tensor_t &GetInput() { return fInputTensor; }

   /** Return the matrix representing the output data. */

   Matrix_t &GetOutput() { return fOutputMatrix; }

   /** Return the matrix holding the event weights. */

   Matrix_t &GetWeights() { return fWeightMatrix; }

};


template <typename Data_t, typename Architecture_t>

class TTensorDataLoader;


/** TTensorBatchIterator

 *

 * Simple iterator class for the iterations over the training batches in

 * a given data set represented by a TTensorDataLoader object.

 *

 * \tparam Data_t         The input data type.

 * \tparam Architecture_t The underlying architecture type.

 */

template <typename Data_t, typename Architecture_t>

class TTensorBatchIterator {

private:

   TTensorDataLoader<Data_t, Architecture_t> &fTensorDataLoader;

   size_t fBatchIndex;


public:

   TTensorBatchIterator(TTensorDataLoader<Data_t, Architecture_t> &tensorDataLoader, size_t index = 0)

      : fTensorDataLoader(tensorDataLoader), fBatchIndex(index)

   {

      // Nothing to do here.

   }


   TTensorBatch<Architecture_t> operator*() { return fTensorDataLoader.GetTensorBatch(); }

   TTensorBatchIterator operator++()

   {

      fBatchIndex++;

      return *this;

   }

   bool operator!=(const TTensorBatchIterator &other) { return fBatchIndex != other.fBatchIndex; }

};


/** TTensorDataLoader

 *

 * Service class managing the streaming of the training data from the input data

 * type to the accelerator device or the CPU. A TTensorDataLoader object manages

 * a number of host and device buffer pairs that are used in a round-robin manner

 * for the transfer of batches to the device.

 *

 * Each TTensorDataLoader object has an associated batch size and a number of total

 * samples in the dataset. One epoch is the number of buffers required to transfer

 * the complete training set. Using the begin() and end() member functions allows

 * the user to iterate over the batches in one epoch.

 *

 * \tparam Data_t The input data type.

 * \tparam Architecture_t The architecture class of the underlying architecture.

 */

template <typename Data_t, typename Architecture_t>

class TTensorDataLoader {

private:

   using HostBuffer_t = typename Architecture_t::HostBuffer_t;

   using DeviceBuffer_t = typename Architecture_t::DeviceBuffer_t;

   using Matrix_t = typename Architecture_t::Matrix_t;

   using Tensor_t = typename Architecture_t::Tensor_t;

   using Shape_t = typename Architecture_t::Tensor_t::Shape_t;

   using BatchIterator_t = TTensorBatchIterator<Data_t, Architecture_t>;


   const Data_t &fData;     ///< The data that should be loaded in the batches.

   size_t fNSamples;        ///< The total number of samples in the dataset.

   size_t fBatchSize;       ///< The size of a batch.

   Shape_t    fInputLayout; ///< The input data layout  (does not include batch size)

   size_t fBatchDepth;      ///< The number of matrices in the tensor.

   size_t fBatchHeight;     ///< The number od rows in each matrix.

   size_t fBatchWidth;      ///< The number of columns in each matrix.

   size_t fNOutputFeatures; ///< The number of outputs from the classifier/regressor.

   size_t fBatchIndex;      ///< The index of the batch when there are multiple batches in parallel


   size_t fNStreams;                           ///< Number of buffer pairs.

   std::vector<DeviceBuffer_t> fDeviceBuffers; ///< The device buffers used to keep the input, output and weight data.

   std::vector<HostBuffer_t> fHostBuffers;     ///< The host buffers used to load the input, output and weight data.


   std::vector<size_t> fSampleIndices; ///< Ordering of the samples in the epoch.


public:

   /*! Constructor. */

   TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize, const Shape_t & inputLayout,

       const Shape_t & batchLayout, size_t nOutputFeatures, size_t nStreams = 1);


   TTensorDataLoader(const TTensorDataLoader &) = default;

   TTensorDataLoader(TTensorDataLoader &&) = default;

   TTensorDataLoader &operator=(const TTensorDataLoader &) = default;

   TTensorDataLoader &operator=(TTensorDataLoader &&) = default;


   /** Copy input tensor into the given host buffer. Function to be specialized by

    *  the architecture-specific backend. */

   void CopyTensorInput(HostBuffer_t &buffer, IndexIterator_t begin);

   /** Copy output matrix into the given host buffer. Function to be specialized

    * by the architecture-specific backend. */

   void CopyTensorOutput(HostBuffer_t &buffer, IndexIterator_t begin);

   /** Copy weight matrix into the given host buffer. Function to be specialized

    * by the architecture-specific backend. */

   void CopyTensorWeights(HostBuffer_t &buffer, IndexIterator_t begin);


   BatchIterator_t begin() { return TTensorBatchIterator<Data_t, Architecture_t>(*this); }

   BatchIterator_t end() { return TTensorBatchIterator<Data_t, Architecture_t>(*this, fNSamples / fBatchSize); }


   /** Shuffle the order of the samples in the batch. The shuffling is indirect,

    *  i.e. only the indices are shuffled. No input data is moved by this

    * routine. */

   template<typename RNG>

   void Shuffle(RNG & rng);


   /** Return the next batch from the training set. The TTensorDataLoader object

    *  keeps an internal counter that cycles over the batches in the training

    *  set. */

   TTensorBatch<Architecture_t> GetTensorBatch();

};


//

// TTensorBatch Class.

//______________________________________________________________________________

template <typename Architecture_t>

TTensorBatch<Architecture_t>::TTensorBatch(Tensor_t &inputTensor, Matrix_t &outputMatrix,

                                           Matrix_t &weightMatrix)

   : fInputTensor(inputTensor), fOutputMatrix(outputMatrix), fWeightMatrix(weightMatrix)

{

   // Nothing to do here.

}


//

// TTensorDataLoader Class.

//______________________________________________________________________________

template <typename Data_t, typename Architecture_t>

TTensorDataLoader<Data_t, Architecture_t>::TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize,

                                                             const Shape_t & inputLayout,  const Shape_t & batchLayout,

                                                             size_t nOutputFeatures, size_t nStreams)

   : fData(data), fNSamples(nSamples), fBatchSize(batchSize), fInputLayout(inputLayout), fBatchDepth(batchLayout[0]), fBatchHeight(batchLayout[1]),

     fBatchWidth(batchLayout[2]), fNOutputFeatures(nOutputFeatures), fBatchIndex(0), fNStreams(nStreams), fDeviceBuffers(),

     fHostBuffers(), fSampleIndices()

{

   size_t inputTensorSize = fBatchDepth * fBatchHeight * fBatchWidth;

   size_t outputMatrixSize = fBatchSize * fNOutputFeatures;

   size_t weightMatrixSize = fBatchSize;


   for (size_t i = 0; i < fNStreams; i++) {

      fHostBuffers.push_back(HostBuffer_t(inputTensorSize + outputMatrixSize + weightMatrixSize));

      fDeviceBuffers.push_back(DeviceBuffer_t(inputTensorSize + outputMatrixSize + weightMatrixSize));

   }


   fSampleIndices.reserve(fNSamples);

   for (size_t i = 0; i < fNSamples; i++) {

      fSampleIndices.push_back(i);

   }

}


//______________________________________________________________________________

template <typename Data_t, typename Architecture_t>

TTensorBatch<Architecture_t> TTensorDataLoader<Data_t, Architecture_t>::GetTensorBatch()

{

   fBatchIndex %= (fNSamples / fBatchSize); // Cycle through samples.


   size_t inputTensorSize =  fBatchDepth * fBatchHeight * fBatchWidth;

   size_t outputMatrixSize = fBatchSize * fNOutputFeatures;

   size_t weightMatrixSize = fBatchSize;


   size_t streamIndex = fBatchIndex % fNStreams;

   HostBuffer_t &hostBuffer = fHostBuffers[streamIndex];

   DeviceBuffer_t &deviceBuffer = fDeviceBuffers[streamIndex];


   HostBuffer_t inputHostBuffer = hostBuffer.GetSubBuffer(0, inputTensorSize);

   HostBuffer_t outputHostBuffer = hostBuffer.GetSubBuffer(inputTensorSize, outputMatrixSize);

   HostBuffer_t weightHostBuffer = hostBuffer.GetSubBuffer(inputTensorSize + outputMatrixSize, weightMatrixSize);


   DeviceBuffer_t inputDeviceBuffer = deviceBuffer.GetSubBuffer(0, inputTensorSize);

   DeviceBuffer_t outputDeviceBuffer = deviceBuffer.GetSubBuffer(inputTensorSize, outputMatrixSize);

   DeviceBuffer_t weightDeviceBuffer = deviceBuffer.GetSubBuffer(inputTensorSize + outputMatrixSize, weightMatrixSize);


   // here sample index has batch size as offset , while in

   // copy tensor input has batch depth.

   // We support then now two cases: batchdepth = 1  batchHeight = batch size

   //   or batch depth = batch

   size_t sampleIndex = fBatchIndex * fBatchSize;

   IndexIterator_t sampleIndexIterator = fSampleIndices.begin() + sampleIndex;


   CopyTensorInput(inputHostBuffer, sampleIndexIterator);

   CopyTensorOutput(outputHostBuffer, sampleIndexIterator);

   CopyTensorWeights(weightHostBuffer, sampleIndexIterator);


   deviceBuffer.CopyFrom(hostBuffer);


   assert(fInputLayout.size() == 3);

   Tensor_t inputTensor = Architecture_t::CreateTensor( inputDeviceBuffer, fBatchSize, fInputLayout[0], fInputLayout[1], fInputLayout[2] );

   // in case of dense layers

   if (fBatchDepth == 1 && fBatchHeight == fBatchSize && fInputLayout[0] == 1 && fInputLayout[1] == 1){

      inputTensor = Tensor_t( inputDeviceBuffer, {fBatchSize, fInputLayout.back() }, Tensor_t::MemoryLayout::ColumnMajor );

   }


   Matrix_t outputMatrix(outputDeviceBuffer, fBatchSize, fNOutputFeatures);

   Matrix_t weightMatrix(weightDeviceBuffer, fBatchSize, 1);


   fBatchIndex++;


   return TTensorBatch<Architecture_t>(inputTensor, outputMatrix, weightMatrix);

}


//______________________________________________________________________________

template <typename Data_t, typename Architecture_t>

template <typename RNG>

void TTensorDataLoader<Data_t, Architecture_t>::Shuffle(RNG & rng)

{

   std::shuffle(fSampleIndices.begin(), fSampleIndices.end(), rng);

}


} // namespace DNN

} // namespace TMVA


#endif

Event.h

data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Definition TGWin32VirtualXProxy.cxx:104

index
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t index
Definition TGWin32VirtualXProxy.cxx:168

TMatrix.h

TMVA::DNN::TTensorBatchIterator
TTensorBatchIterator.
Definition TensorDataLoader.h:96

TMVA::DNN::TTensorBatchIterator::fBatchIndex
size_t fBatchIndex
Definition TensorDataLoader.h:99

TMVA::DNN::TTensorBatchIterator::TTensorBatchIterator
TTensorBatchIterator(TTensorDataLoader< Data_t, Architecture_t > &tensorDataLoader, size_t index=0)
Definition TensorDataLoader.h:102

TMVA::DNN::TTensorBatchIterator::operator*
TTensorBatch< Architecture_t > operator*()
Definition TensorDataLoader.h:108

TMVA::DNN::TTensorBatchIterator::operator!=
bool operator!=(const TTensorBatchIterator &other)
Definition TensorDataLoader.h:114

TMVA::DNN::TTensorBatchIterator::fTensorDataLoader
TTensorDataLoader< Data_t, Architecture_t > & fTensorDataLoader
Definition TensorDataLoader.h:98

TMVA::DNN::TTensorBatchIterator::operator++
TTensorBatchIterator operator++()
Definition TensorDataLoader.h:109

TMVA::DNN::TTensorBatch
TTensorBatch.
Definition TensorDataLoader.h:59

TMVA::DNN::TTensorBatch::operator=
TTensorBatch & operator=(const TTensorBatch &)=default

TMVA::DNN::TTensorBatch::fInputTensor
Tensor_t fInputTensor
The input tensor batch, one matrix one input.
Definition TensorDataLoader.h:65

TMVA::DNN::TTensorBatch::TTensorBatch
TTensorBatch(TTensorBatch &&)=default

TMVA::DNN::TTensorBatch::TTensorBatch
TTensorBatch(Tensor_t &, Matrix_t &, Matrix_t &)
Definition TensorDataLoader.h:198

TMVA::DNN::TTensorBatch::fWeightMatrix
Matrix_t fWeightMatrix
The event/example weights.
Definition TensorDataLoader.h:67

TMVA::DNN::TTensorBatch::operator=
TTensorBatch & operator=(TTensorBatch &&)=default

TMVA::DNN::TTensorBatch::Tensor_t
typename Architecture_t::Tensor_t Tensor_t
Definition TensorDataLoader.h:62

TMVA::DNN::TTensorBatch::GetWeights
Matrix_t & GetWeights()
Return the matrix holding the event weights.
Definition TensorDataLoader.h:81

TMVA::DNN::TTensorBatch::GetOutput
Matrix_t & GetOutput()
Return the matrix representing the output data.
Definition TensorDataLoader.h:79

TMVA::DNN::TTensorBatch::TTensorBatch
TTensorBatch(const TTensorBatch &)=default

TMVA::DNN::TTensorBatch::fOutputMatrix
Matrix_t fOutputMatrix
The output matrix representing the ground truth.
Definition TensorDataLoader.h:66

TMVA::DNN::TTensorBatch::GetInput
Tensor_t & GetInput()
Return the tensor representing the input data.
Definition TensorDataLoader.h:77

TMVA::DNN::TTensorBatch::Matrix_t
typename Architecture_t::Matrix_t Matrix_t
Definition TensorDataLoader.h:61

TMVA::DNN::TTensorDataLoader
TTensorDataLoader.
Definition TensorDataLoader.h:133

TMVA::DNN::TTensorDataLoader::Shape_t
typename Architecture_t::Tensor_t::Shape_t Shape_t
Definition TensorDataLoader.h:139

TMVA::DNN::TTensorDataLoader::TTensorDataLoader
TTensorDataLoader(const TTensorDataLoader &)=default

TMVA::DNN::TTensorDataLoader::Shuffle
void Shuffle(RNG &rng)
Shuffle the order of the samples in the batch.
Definition TensorDataLoader.h:285

TMVA::DNN::TTensorDataLoader::fNOutputFeatures
size_t fNOutputFeatures
The number of outputs from the classifier/regressor.
Definition TensorDataLoader.h:149

TMVA::DNN::TTensorDataLoader::fSampleIndices
std::vector< size_t > fSampleIndices
Ordering of the samples in the epoch.
Definition TensorDataLoader.h:157

TMVA::DNN::TTensorDataLoader::fDeviceBuffers
std::vector< DeviceBuffer_t > fDeviceBuffers
The device buffers used to keep the input, output and weight data.
Definition TensorDataLoader.h:154

TMVA::DNN::TTensorDataLoader::GetTensorBatch
TTensorBatch< Architecture_t > GetTensorBatch()
Return the next batch from the training set.
Definition TensorDataLoader.h:233

TMVA::DNN::TTensorDataLoader::operator=
TTensorDataLoader & operator=(const TTensorDataLoader &)=default

TMVA::DNN::TTensorDataLoader::DeviceBuffer_t
typename Architecture_t::DeviceBuffer_t DeviceBuffer_t
Definition TensorDataLoader.h:136

TMVA::DNN::TTensorDataLoader::fBatchWidth
size_t fBatchWidth
The number of columns in each matrix.
Definition TensorDataLoader.h:148

TMVA::DNN::TTensorDataLoader::CopyTensorOutput
void CopyTensorOutput(HostBuffer_t &buffer, IndexIterator_t begin)
Copy output matrix into the given host buffer.

TMVA::DNN::TTensorDataLoader::fBatchIndex
size_t fBatchIndex
The index of the batch when there are multiple batches in parallel.
Definition TensorDataLoader.h:150

TMVA::DNN::TTensorDataLoader::fBatchHeight
size_t fBatchHeight
The number od rows in each matrix.
Definition TensorDataLoader.h:147

TMVA::DNN::TTensorDataLoader::fHostBuffers
std::vector< HostBuffer_t > fHostBuffers
The host buffers used to load the input, output and weight data.
Definition TensorDataLoader.h:155

TMVA::DNN::TTensorDataLoader::TTensorDataLoader
TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize, const Shape_t &inputLayout, const Shape_t &batchLayout, size_t nOutputFeatures, size_t nStreams=1)
Constructor.
Definition TensorDataLoader.h:209

TMVA::DNN::TTensorDataLoader::begin
BatchIterator_t begin()
Definition TensorDataLoader.h:179

TMVA::DNN::TTensorDataLoader::TTensorDataLoader
TTensorDataLoader(TTensorDataLoader &&)=default

TMVA::DNN::TTensorDataLoader::fBatchSize
size_t fBatchSize
The size of a batch.
Definition TensorDataLoader.h:144

TMVA::DNN::TTensorDataLoader::fInputLayout
Shape_t fInputLayout
The input data layout (does not include batch size)
Definition TensorDataLoader.h:145

TMVA::DNN::TTensorDataLoader::CopyTensorWeights
void CopyTensorWeights(HostBuffer_t &buffer, IndexIterator_t begin)
Copy weight matrix into the given host buffer.

TMVA::DNN::TTensorDataLoader::Matrix_t
typename Architecture_t::Matrix_t Matrix_t
Definition TensorDataLoader.h:137

TMVA::DNN::TTensorDataLoader::HostBuffer_t
typename Architecture_t::HostBuffer_t HostBuffer_t
Definition TensorDataLoader.h:135

TMVA::DNN::TTensorDataLoader::fBatchDepth
size_t fBatchDepth
The number of matrices in the tensor.
Definition TensorDataLoader.h:146

TMVA::DNN::TTensorDataLoader::fNStreams
size_t fNStreams
Number of buffer pairs.
Definition TensorDataLoader.h:153

TMVA::DNN::TTensorDataLoader::fData
const Data_t & fData
The data that should be loaded in the batches.
Definition TensorDataLoader.h:142

TMVA::DNN::TTensorDataLoader::operator=
TTensorDataLoader & operator=(TTensorDataLoader &&)=default

TMVA::DNN::TTensorDataLoader::Tensor_t
typename Architecture_t::Tensor_t Tensor_t
Definition TensorDataLoader.h:138

TMVA::DNN::TTensorDataLoader::end
BatchIterator_t end()
Definition TensorDataLoader.h:180

TMVA::DNN::TTensorDataLoader::fNSamples
size_t fNSamples
The total number of samples in the dataset.
Definition TensorDataLoader.h:143

TMVA::DNN::TTensorDataLoader::CopyTensorInput
void CopyTensorInput(HostBuffer_t &buffer, IndexIterator_t begin)
Copy input tensor into the given host buffer.

TMVA::DataSetInfo
Class that contains all the data information.
Definition DataSetInfo.h:62

TMatrixT
TMatrixT.
Definition TMatrixT.h:40

TMVA::DNN::TensorInput
std::tuple< const std::vector< TMatrixT< Double_t > > &, const TMatrixT< Double_t > &, const TMatrixT< Double_t > & > TensorInput
Definition TensorDataLoader.h:44

TMVA::DNN::IndexIterator_t
typename std::vector< size_t >::iterator IndexIterator_t
Definition DataLoader.h:42

TMVA::DNN::TMVAInput_t
std::tuple< const std::vector< Event * > &, const DataSetInfo & > TMVAInput_t
Definition DataLoader.h:40

TMVA
create variable transformations
Definition GeneticMinimizer.h:22