24template <
typename AFloat>
25template <
typename RNNLayer>
31 Tensor_t(
layer->GetOutput().GetDeviceBuffer(),
32 {layer->GetBatchSize(), timeSteps, layer->GetStateSize()}, GetTensorLayout());
33 layer->GetActivationGradients() =
34 Tensor_t(
layer->GetActivationGradients().GetDeviceBuffer(), {layer->GetBatchSize(), timeSteps, layer->GetStateSize()},
38 for (
size_t i = 0; i <
layer->GetWeights().
size(); ++i) {
39 auto &
w =
layer->GetWeightsAt(i);
41 w = Tensor_t(
layer->GetWeightsAt(i).GetDeviceBuffer(), {layer->GetWeightsAt(i).GetNrows(), layer->GetWeightsAt(i).GetNcols()},
45 for (
size_t i = 0; i <
layer->GetBiases().
size(); ++i) {
48 auto &
b =
layer->GetBiasesAt(i);
49 b = Tensor_t(
layer->GetBiasesAt(i).GetDeviceBuffer(), {layer->GetStateSize(), 1}, GetTensorLayout(), 0, 0);
60 layer->GetX() = Tensor_t({
layer->GetTimeSteps(),
layer->GetBatchSize(),
layer->GetInputSize() }, GetTensorLayout());
61 layer->GetY() = Tensor_t({
layer->GetTimeSteps(),
layer->GetBatchSize(),
layer->GetStateSize() }, GetTensorLayout());
63 layer->GetDX() = Tensor_t({
layer->GetTimeSteps(),
layer->GetBatchSize(),
layer->GetInputSize() }, GetTensorLayout());
64 layer->GetDY() = Tensor_t({
layer->GetTimeSteps(),
layer->GetBatchSize(),
layer->GetStateSize() }, GetTensorLayout());
67template <
typename AFloat>
68template <
typename RNNLayer>
79 if ( std::is_same<RNNLayer, LSTMLayer_t>::value )
rnn_type =
kLSTM;
80 if ( std::is_same<RNNLayer, GRULayer_t>::value )
rnn_type =
kGRU;
93 unsigned long long seed = GetRandomGenerator().GetSeed();
104 int inputSize =
layer->GetInputSize();
138 if (
layer->GetBiases().size() > 0)
150#if (CUDNN_VERSION >= 8000)
230#if (CUDNN_VERSION >= 8000)
252#if (CUDNN_VERSION >= 8000)
261 weightTensor = Tensor_t( { (size_t)
dimW[0], 1, 1}, GetTensorLayout(), 0, 0);
281#if (CUDNN_VERSION >= 8000)
314#if (CUDNN_VERSION >= 8000)
365#if (CUDNN_VERSION >= 8000)
401#if (CUDNN_VERSION >= 8000)
432#if (CUDNN_VERSION >= 8000)
451#if (CUDNN_VERSION < 8000)
453 for (
size_t i = 0; i <
layer->GetWeights().
size(); ++i) {
454 auto &
w =
layer->GetWeightsAt(i);
455 auto &
dw =
layer->GetWeightGradientsAt(i);
457 std::cerr <<
"Error - different offset for weight " << i << std::endl;
461 GetTensorLayout(), 0, 0);
467 for (
size_t i = 0; i <
layer->GetBiases().
size(); ++i) {
468 auto &
b =
layer->GetBiasesAt(i);
469 auto &
db =
layer->GetBiasGradientsAt(i);
471 std::cerr <<
"Error - different offset for bias " << i << std::endl;
474 b = Tensor_t(
weightTensor.GetDeviceBuffer().GetSubBuffer(
offset,
b.GetSize()),
b.GetShape(), GetTensorLayout(), 0, 0);
487template<
typename AFloat>
490 auto &
rnnDescriptors =
static_cast<RNNDescriptors_t &
>(*descriptors);
494#if (CUDNN_VERSION >= 8000)
514template <
typename AFloat>
515template <
typename RNNLayer>
532 stateTensor = Tensor_t(
stateTensor.GetDeviceBuffer(), { numLayers, layer->GetBatchSize(), layer->GetStateSize()},
533 GetTensorLayout(), 0, 0 );
535 if (
layer->GetCell().GetSize() > 0) {
537 cellStateTensor = Tensor_t(
cellStateTensor.GetDeviceBuffer(), {numLayers, layer->GetBatchSize(), layer->GetStateSize()}, GetTensorLayout(), 0, 0 );
542#if (CUDNN_VERSION >= 8000)
563 std::cerr <<
"Error allocating RNN workspace of size " <<
rnnWorkspace->ForwardWorkspaceSize <<
" - probably running out of memory on the GPU"
565 std::cout <<
" layer input shape is { " <<
layer->GetBatchSize() <<
" , " <<
layer->GetTimeSteps() <<
" , "
566 <<
layer->GetStateSize() <<
" } " << std::endl;
576 std::cerr <<
"Error allocating RNN reserved workspace of size " <<
rnnWorkspace->HelperWorkspaceSize <<
" - probably running out of memory on the GPU"
578 std::cout <<
" layer input shape is { " <<
layer->GetBatchSize() <<
" , " <<
layer->GetTimeSteps() <<
" , "
579 <<
layer->GetStateSize() <<
" } " << std::endl;
589template <
typename AFloat>
591 if (!workspace)
return;
592 auto rnnWorkspace =
static_cast<RNNWorkspace_t *
>(workspace);
602template <
typename AFloat>
604 Tensor_t &
hy, Tensor_t &
cy,
const RNNDescriptors_t & desc, RNNWorkspace_t &workspace,
bool isTraining)
620#if (CUDNN_VERSION >= 8000)
625 weights.GetSize()*
sizeof(float);
631 desc.xDataDesc,
x.GetDataPointer(), desc.yDataDesc,
y.GetDataPointer(),
635 (
isLSTM) ?
cy.GetDataPointer() : nullptr,
637 workspace.HelperWorkspaceSize, workspace.HelperWorkspace);
650 workspace.HelperWorkspace, workspace.HelperWorkspaceSize);
662 desc.WeightsDescriptor, weights.GetDataPointer(), desc.yDesc.
data(),
y.GetDataPointer(),
664 (
isLSTM) ?
cy.GetDataPointer() : nullptr, workspace.ForwardWorkspace, workspace.ForwardWorkspaceSize);
678template <
typename AFloat>
680 const Tensor_t &
dy,
const Tensor_t &
dhy,
const Tensor_t &
dcy,
const Tensor_t &weights,
681 Tensor_t &
dx, Tensor_t &
dhx, Tensor_t &
dcx, Tensor_t &
dw,
const RNNDescriptors_t &desc,
682 RNNWorkspace_t &workspace)
689 int batchSize =
x.GetShape()[1];
697#if (CUDNN_VERSION >= 8000)
708 weights.GetSize()*
sizeof(float);
711 desc.yDataDesc,
y.GetDataPointer(),
dy.GetDataPointer(),
712 desc.xDataDesc,
dx.GetDataPointer(),
716 (
isLSTM) ?
cx.GetDataPointer() : nullptr, (
isLSTM) ?
dcy.GetDataPointer() : nullptr, (
isLSTM) ?
dcx.GetDataPointer() : nullptr,
718 workspace.ForwardWorkspaceSize, workspace.ForwardWorkspace, workspace.HelperWorkspaceSize, workspace.HelperWorkspace);
734 desc.xDataDesc,
x.GetDataPointer(),
738 workspace.ForwardWorkspaceSize, workspace.ForwardWorkspace, workspace.HelperWorkspaceSize, workspace.HelperWorkspace);
752 (
isLSTM) ?
cx.GetDataPointer() : nullptr,
756 (
isLSTM) ?
dcx.GetDataPointer() : nullptr,
757 workspace.ForwardWorkspace, workspace.ForwardWorkspaceSize, workspace.HelperWorkspace,
758 workspace.HelperWorkspaceSize);
766 desc.yDesc.
data(),
y.GetDataPointer(), workspace.ForwardWorkspace,
767 workspace.ForwardWorkspaceSize, desc.WeightsGradDescriptor,
dw.GetDataPointer(),
768 workspace.HelperWorkspace, workspace.HelperWorkspaceSize);
777template<
typename AFloat>
785 TensorDescriptor_t
d =
tmp.GetTensorDescriptor();
807 for (
int i = 0; i <
xNdim; ++i)
814 y.GetTensorDescriptor(),
y.GetDataPointer());
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
#define R__ASSERT(e)
Checks condition e and reports a fatal error if it's false.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h offset
Option_t Option_t TPoint TPoint const char mode
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t format
double beta(double x, double y)
Calculates the beta function.
create variable transformations