Logo ROOT  
Reference Guide
GeneralLayer.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TGeneralLayer *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * General Deep Neural Network Layer *
12 * *
13 * Authors (alphabetical): *
14 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15 * *
16 * Copyright (c) 2005-2015: *
17 * CERN, Switzerland *
18 * U. of Victoria, Canada *
19 * MPI-K Heidelberg, Germany *
20 * U. of Bonn, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27#ifndef TMVA_DNN_GENERALLAYER
28#define TMVA_DNN_GENERALLAYER
29
30#include <iostream>
31#include <limits>
32
33// for xml
34#include "TMVA/Tools.h"
35#include "TError.h" // for R__ASSERT
36
37#include "TMVA/DNN/Functions.h"
38
39namespace TMVA {
40namespace DNN {
41
42/** \class VGeneralLayer
43 Generic General Layer class.
44
45 This class represents the general class for all layers in the Deep Learning
46 Module.
47 */
48template <typename Architecture_t>
50
51 using Tensor_t = typename Architecture_t::Tensor_t;
52 using Matrix_t = typename Architecture_t::Matrix_t;
53 using Scalar_t = typename Architecture_t::Scalar_t;
54
55protected:
56 size_t fBatchSize; ///< Batch size used for training and evaluation
57
58 size_t fInputDepth; ///< The depth of the previous layer or input.
59 size_t fInputHeight; ///< The height of the previous layer or input.
60 size_t fInputWidth; ///< The width of the previous layer or input.
61
62 size_t fDepth; ///< The depth of the layer.
63 size_t fHeight; ///< The height of the layer.
64 size_t fWidth; ///< The width of this layer.
65
66 bool fIsTraining; ///< Flag indicating the mode
67
68 std::vector<Matrix_t> fWeights; ///< The weights associated to the layer.
69 std::vector<Matrix_t> fBiases; ///< The biases associated to the layer.
70
71 std::vector<Matrix_t> fWeightGradients; ///< Gradients w.r.t. the weights of the layer.
72 std::vector<Matrix_t> fBiasGradients; ///< Gradients w.r.t. the bias values of the layer.
73
74 Tensor_t fOutput; ///< Activations of this layer.
75 Tensor_t fActivationGradients; ///< Gradients w.r.t. the activations of this layer.
76
77 EInitialization fInit; ///< The initialization method.
78
79public:
80 /*! Constructor */
81 VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,
82 size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols,
83 size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows,
84 size_t OutputNCols, EInitialization Init);
85
86 /*! General Constructor with different weights dimension */
87 VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,
88 size_t Height, size_t Width, size_t WeightsNSlices, std::vector<size_t> WeightsNRows,
89 std::vector<size_t> WeightsNCols, size_t BiasesNSlices, std::vector<size_t> BiasesNRows,
90 std::vector<size_t> BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols,
92
93 /*! Copy the layer provided as a pointer */
95
96 /*! Copy Constructor */
98
99 /*! Virtual Destructor. */
100 virtual ~VGeneralLayer();
101
102 /*! Initialize the weights and biases according to the given initialization method. */
103 virtual void Initialize();
104
105 /*! Computes activation of the layer for the given input. The input
106 * must be in 3D tensor form with the different matrices corresponding to
107 * different events in the batch. */
108 virtual void Forward(Tensor_t &input, bool applyDropout = false) = 0;
109
110 /*! Backpropagates the error. Must only be called directly at the corresponding
111 * call to Forward(...). */
112 virtual void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward ) = 0;
113 ///// std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2) = 0;
114
115 /*! Reset some training flags after a loop on all batches
116 Some layer (e.g. batchnormalization) might need to implement the function in case some operations
117 are needed after looping an all batches */
118 virtual void ResetTraining() {}
119
120 /*! Updates the weights and biases, given the learning rate */
121 void Update(const Scalar_t learningRate);
122
123 /*! Updates the weights, given the gradients and the learning rate, */
124 void UpdateWeights(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);
125
126 /*! Updates the biases, given the gradients and the learning rate. */
127 void UpdateBiases(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);
128
129 /*! Updates the weight gradients, given some other weight gradients and learning rate. */
130 void UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);
131
132 /*! Updates the bias gradients, given some other weight gradients and learning rate. */
133 void UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);
134
135 /*! Copies the weights provided as an input. */
136 void CopyWeights(const std::vector<Matrix_t> &otherWeights);
137
138 /*! Copies the biases provided as an input. */
139 void CopyBiases(const std::vector<Matrix_t> &otherBiases);
140
141 /*! Copy all trainable weight and biases from another equivalent layer but with different architecture
142 The function can copy also extra parameters in addition to weights and biases if they are return
143 by the function GetExtraLayerParameters */
144 template <typename Arch>
145 void CopyParameters(const VGeneralLayer<Arch> &layer);
146
147 /*! Prints the info about the layer. */
148 virtual void Print() const = 0;
149
150 /*! Writes the information and the weights about the layer in an XML node. */
151 virtual void AddWeightsXMLTo(void *parent) = 0;
152
153 /*! Read the information and the weights about the layer from XML node. */
154 virtual void ReadWeightsFromXML(void *parent) = 0;
155
156 /*! Set Dropout probability. Reimplemented for layesrs supporting droput */
158
159 /*! Getters */
160 size_t GetBatchSize() const { return fBatchSize; }
161 size_t GetInputDepth() const { return fInputDepth; }
162 size_t GetInputHeight() const { return fInputHeight; }
163 size_t GetInputWidth() const { return fInputWidth; }
164 size_t GetDepth() const { return fDepth; }
165 size_t GetHeight() const { return fHeight; }
166 size_t GetWidth() const { return fWidth; }
167 bool IsTraining() const { return fIsTraining; }
168
169 const std::vector<Matrix_t> &GetWeights() const { return fWeights; }
170 std::vector<Matrix_t> &GetWeights() { return fWeights; }
171
172 const Matrix_t &GetWeightsAt(size_t i) const { return fWeights[i]; }
173 Matrix_t &GetWeightsAt(size_t i) { return fWeights[i]; }
174
175 const std::vector<Matrix_t> &GetBiases() const { return fBiases; }
176 std::vector<Matrix_t> &GetBiases() { return fBiases; }
177
178 const Matrix_t &GetBiasesAt(size_t i) const { return fBiases[i]; }
179 Matrix_t &GetBiasesAt(size_t i) { return fBiases[i]; }
180
181 const std::vector<Matrix_t> &GetWeightGradients() const { return fWeightGradients; }
182 std::vector<Matrix_t> &GetWeightGradients() { return fWeightGradients; }
183
184 const Matrix_t &GetWeightGradientsAt(size_t i) const { return fWeightGradients[i]; }
186
187 const std::vector<Matrix_t> &GetBiasGradients() const { return fBiasGradients; }
188 std::vector<Matrix_t> &GetBiasGradients() { return fBiasGradients; }
189
190 const Matrix_t &GetBiasGradientsAt(size_t i) const { return fBiasGradients[i]; }
192
193 const Tensor_t &GetOutput() const { return fOutput; }
195
198
199 Matrix_t GetOutputAt(size_t i) { return fOutput.At(i).GetMatrix(); }
200 const Matrix_t &GetOutputAt(size_t i) const { return fOutput.At(i).GetMatrix(); }
201
202 Matrix_t GetActivationGradientsAt(size_t i) { return fActivationGradients.At(i).GetMatrix(); }
203 const Matrix_t &GetActivationGradientsAt(size_t i) const { return fActivationGradients.At(i).GetMatrix(); }
204
205 // function to retrieve additional layer parameters which are learned during training but they are not weights
206 // an example are the mean and std of batch normalization layer
207 virtual std::vector<Matrix_t> GetExtraLayerParameters() const { return std::vector<Matrix_t>(); }
208 // same thing but to set these extra parameters
209 virtual void SetExtraLayerParameters(const std::vector<Matrix_t> & ) {}
210
212
213 /*! Setters */
214 void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
215 void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
216 void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
217 void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
218 void SetDepth(size_t depth) { fDepth = depth; }
219 void SetHeight(size_t height) { fHeight = height; }
220 void SetWidth(size_t width) { fWidth = width; }
221 void SetIsTraining(bool isTraining) { fIsTraining = isTraining; }
222
223 /// helper functions for XML
224 void WriteTensorToXML( void * node, const char * name, const std::vector<Matrix_t> & tensor);
225 void WriteMatrixToXML( void * node, const char * name, const Matrix_t & matrix);
226
227 void ReadMatrixXML( void * node, const char * name, Matrix_t & matrix);
228
229};
230
231//
232//
233// The General Layer Class - Implementation
234//_________________________________________________________________________________________________
235template <typename Architecture_t>
236VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
237 size_t depth, size_t height, size_t width, size_t weightsNSlices,
238 size_t weightsNRows, size_t weightsNCols, size_t biasesNSlices,
239 size_t biasesNRows, size_t biasesNCols, size_t outputNSlices,
240 size_t outputNRows, size_t outputNCols, EInitialization init)
241 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
242 fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
243 fOutput( outputNSlices, outputNRows, outputNCols ),
244 fActivationGradients( outputNSlices, outputNRows, outputNCols ),
245 fInit(init)
246{
247
248 for (size_t i = 0; i < weightsNSlices; i++) {
249 fWeights.emplace_back(weightsNRows, weightsNCols);
250 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
251 }
252
253 for (size_t i = 0; i < biasesNSlices; i++) {
254 fBiases.emplace_back(biasesNRows, biasesNCols);
255 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
256 }
257}
258
259//_________________________________________________________________________________________________
260template <typename Architecture_t>
261VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
262 size_t depth, size_t height, size_t width, size_t weightsNSlices,
263 std::vector<size_t> weightsNRows, std::vector<size_t> weightsNCols,
264 size_t biasesNSlices, std::vector<size_t> biasesNRows,
265 std::vector<size_t> biasesNCols, size_t outputNSlices, size_t outputNRows,
266 size_t outputNCols, EInitialization init)
267 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
268 fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
269 fOutput( outputNSlices, outputNRows, outputNCols ),
270 fActivationGradients( outputNSlices, outputNRows, outputNCols ),
271 fInit(init)
272{
273 // add constructor for weights with different shapes (e.g. in recurrent layers)
274 for (size_t i = 0; i < weightsNSlices; i++) {
275 fWeights.emplace_back(weightsNRows[i], weightsNCols[i]);
276 fWeightGradients.emplace_back(weightsNRows[i], weightsNCols[i]);
277 }
278
279 for (size_t i = 0; i < biasesNSlices; i++) {
280 fBiases.emplace_back(biasesNRows[i], biasesNCols[i]);
281 fBiasGradients.emplace_back(biasesNRows[i], biasesNCols[i]);
282 }
283
284 // for (size_t i = 0; i < outputNSlices; i++) {
285 // fOutput.emplace_back(outputNRows, outputNCols);
286 // fActivationGradients.emplace_back(outputNRows, outputNCols);
287 // }
288}
289
290//_________________________________________________________________________________________________
291template <typename Architecture_t>
293 : fBatchSize(layer->GetBatchSize()), fInputDepth(layer->GetInputDepth()), fInputHeight(layer->GetInputHeight()),
294 fInputWidth(layer->GetInputWidth()), fDepth(layer->GetDepth()), fHeight(layer->GetHeight()),
295 fWidth(layer->GetWidth()), fIsTraining(layer->IsTraining()), fWeights(), fBiases(), fWeightGradients(),
296 fBiasGradients(),
297 fOutput( layer->GetOutput().GetShape() ), // construct from shape of other tensor
298 fActivationGradients( layer->GetActivationGradients().GetShape() ),
299 fInit(layer->GetInitialization() )
300{
301 // Constructor from another layer pointer of a different architecture
302 size_t weightsNSlices = (layer->GetWeights()).size();
303 size_t weightsNRows = 0;
304 size_t weightsNCols = 0;
305
306 for (size_t i = 0; i < weightsNSlices; i++) {
307 weightsNRows = (layer->GetWeightsAt(i)).GetNrows();
308 weightsNCols = (layer->GetWeightsAt(i)).GetNcols();
309
310 fWeights.emplace_back(weightsNRows, weightsNCols);
311 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
312
314 }
315
316 size_t biasesNSlices = (layer->GetBiases()).size();
317 size_t biasesNRows = 0;
318 size_t biasesNCols = 0;
319
320 for (size_t i = 0; i < biasesNSlices; i++) {
321 biasesNRows = (layer->GetBiasesAt(i)).GetNrows();
322 biasesNCols = (layer->GetBiasesAt(i)).GetNcols();
323
324 fBiases.emplace_back(biasesNRows, biasesNCols);
325 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
326
328 }
329}
330
331//_________________________________________________________________________________________________
332template <typename Architecture_t>
334 : fBatchSize(layer.fBatchSize), fInputDepth(layer.fInputDepth), fInputHeight(layer.fInputHeight),
335 fInputWidth(layer.fInputWidth), fDepth(layer.fDepth), fHeight(layer.fHeight), fWidth(layer.fWidth),
336 fIsTraining(layer.fIsTraining), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
337 fOutput( layer.GetOutput() ),
338 fActivationGradients( layer.GetActivationGradients() ),
339 fInit( layer.GetInitialization())
340{
341 // copy constructor
342 size_t weightsNSlices = layer.fWeights.size();
343 size_t weightsNRows = 0;
344 size_t weightsNCols = 0;
345
346 for (size_t i = 0; i < weightsNSlices; i++) {
347 weightsNRows = (layer.fWeights[i]).GetNrows();
348 weightsNCols = (layer.fWeights[i]).GetNcols();
349
350 fWeights.emplace_back(weightsNRows, weightsNCols);
351 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
352
354 }
355
356 size_t biasesNSlices = layer.fBiases.size();
357 size_t biasesNRows = 0;
358 size_t biasesNCols = 0;
359
360 for (size_t i = 0; i < biasesNSlices; i++) {
361 biasesNRows = (layer.fBiases[i]).GetNrows();
362 biasesNCols = (layer.fBiases[i]).GetNcols();
363
364 fBiases.emplace_back(biasesNRows, biasesNCols);
365 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
366
368 }
369
370 size_t outputNSlices = layer.fOutput.size();
371 size_t outputNRows = 0;
372 size_t outputNCols = 0;
373
374 for (size_t i = 0; i < outputNSlices; i++) {
375 outputNRows = (layer.fOutput[i]).GetNrows();
376 outputNCols = (layer.fOutput[i]).GetNcols();
377
378 fOutput.emplace_back(outputNRows, outputNCols);
379 fActivationGradients.emplace_back(outputNRows, outputNCols);
380 }
381}
382
383//_________________________________________________________________________________________________
384template <typename Architecture_t>
386{
387 // Nothing to do here.
388}
389
390//_________________________________________________________________________________________________
391template <typename Architecture_t>
393{
394 for (size_t i = 0; i < fWeights.size(); i++) {
395 initialize<Architecture_t>(fWeights[i], this->GetInitialization());
396 initialize<Architecture_t>(fWeightGradients[i], EInitialization::kZero);
397 }
398
399 for (size_t i = 0; i < fBiases.size(); i++) {
400 initialize<Architecture_t>(fBiases[i], EInitialization::kZero);
401 initialize<Architecture_t>(fBiasGradients[i], EInitialization::kZero);
402 }
403}
404
405//_________________________________________________________________________________________________
406template <typename Architecture_t>
407auto VGeneralLayer<Architecture_t>::Update(const Scalar_t learningRate) -> void
408{
409 this->UpdateWeights(fWeightGradients, learningRate);
410 this->UpdateBiases(fBiasGradients, learningRate);
411}
412
413//_________________________________________________________________________________________________
414template <typename Architecture_t>
415auto VGeneralLayer<Architecture_t>::UpdateWeights(const std::vector<Matrix_t> &weightGradients,
416 const Scalar_t learningRate) -> void
417{
418 for (size_t i = 0; i < fWeights.size(); i++) {
419 Architecture_t::ScaleAdd(fWeights[i], weightGradients[i], -learningRate);
420 }
421}
422
423//_________________________________________________________________________________________________
424template <typename Architecture_t>
425auto VGeneralLayer<Architecture_t>::UpdateBiases(const std::vector<Matrix_t> &biasGradients,
426 const Scalar_t learningRate) -> void
427{
428 for (size_t i = 0; i < fBiases.size(); i++) {
429 Architecture_t::ScaleAdd(fBiases[i], biasGradients[i], -learningRate);
430 }
431}
432
433//_________________________________________________________________________________________________
434template <typename Architecture_t>
435auto VGeneralLayer<Architecture_t>::UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients,
436 const Scalar_t learningRate) -> void
437{
438 for (size_t i = 0; i < fWeightGradients.size(); i++) {
439 Architecture_t::ScaleAdd(fWeightGradients[i], weightGradients[i], -learningRate);
440 }
441}
442
443//_________________________________________________________________________________________________
444template <typename Architecture_t>
445auto VGeneralLayer<Architecture_t>::UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients,
446 const Scalar_t learningRate) -> void
447{
448 for (size_t i = 0; i < fBiasGradients.size(); i++) {
449 Architecture_t::ScaleAdd(fBiasGradients[i], biasGradients[i], -learningRate);
450 }
451}
452
453//_________________________________________________________________________________________________
454template <typename Architecture_t>
455auto VGeneralLayer<Architecture_t>::CopyWeights(const std::vector<Matrix_t> &otherWeights) -> void
456{
457
458 for (size_t i = 0; i < fWeights.size(); i++) {
459 Architecture_t::Copy(fWeights[i], otherWeights[i]);
460 }
461}
462
463//_________________________________________________________________________________________________
464template <typename Architecture_t>
465auto VGeneralLayer<Architecture_t>::CopyBiases(const std::vector<Matrix_t> &otherBiases) -> void
466{
467 for (size_t i = 0; i < fBiases.size(); i++) {
468 Architecture_t::Copy(fBiases[i], otherBiases[i]);
469 }
470}
471
472//_________________________________________________________________________________________________
473template <typename Architecture_t>
474template <typename Arch>
476{
477 //assert(!std::is_same<Arch, Architecture_t>::value);
478 // copy weights from a different arhcitecture- default generic implementation
479 Architecture_t::CopyDiffArch(this->GetWeights(), layer.GetWeights());
480 Architecture_t::CopyDiffArch(this->GetBiases(), layer.GetBiases());
481
482 // copy also the additional layer parameters
483 auto params = layer.GetExtraLayerParameters();
484 if (params.size() > 0) {
485 auto paramsToCopy = GetExtraLayerParameters();
486 Architecture_t::CopyDiffArch(paramsToCopy, params );
487 SetExtraLayerParameters(paramsToCopy);
488 }
489}
490
491//_________________________________________________________________________________________________
492template <typename Architecture_t>
493auto VGeneralLayer<Architecture_t>::WriteTensorToXML(void * node, const char * name, const std::vector<Matrix_t> & tensor) -> void
494{
495 auto xmlengine = gTools().xmlengine();
496 void* matnode = xmlengine.NewChild(node, 0, name);
497 if (tensor.size() == 0) return;
498 xmlengine.NewAttr(matnode,0,"Depth", gTools().StringFromInt(tensor.size()) );
499 // assume same number of rows and columns for every matrix in std::vector
500 xmlengine.NewAttr(matnode,0,"Rows", gTools().StringFromInt(tensor[0].GetNrows()) );
501 xmlengine.NewAttr(matnode,0,"Columns", gTools().StringFromInt(tensor[0].GetNcols()) );
502 std::stringstream s;
503 for (size_t i = 0; i < tensor.size(); ++i) {
504 auto & mat = tensor[i];
505 for (Int_t row = 0; row < mat.GetNrows(); row++) {
506 for (Int_t col = 0; col < mat.GetNcols(); col++) {
507 // TString tmp = TString::Format( "%5.15e ", (mat)(row,col) );
508 // s << tmp.Data();
509 s << std::scientific << mat(row, col) << " ";
510 }
511 }
512 }
513 xmlengine.AddRawLine( matnode, s.str().c_str() );
514}
515
516//_________________________________________________________________________________________________
517template <typename Architecture_t>
518auto VGeneralLayer<Architecture_t>::WriteMatrixToXML(void * node, const char * name, const Matrix_t & matrix) -> void
519{
520 auto xmlengine = gTools().xmlengine();
521 void* matnode = xmlengine.NewChild(node, 0, name);
522
523 xmlengine.NewAttr(matnode,0,"Rows", gTools().StringFromInt(matrix.GetNrows()) );
524 xmlengine.NewAttr(matnode,0,"Columns", gTools().StringFromInt(matrix.GetNcols()) );
525 std::stringstream s;
526 s.precision( std::numeric_limits<Scalar_t>::digits10 );
527 size_t nrows = matrix.GetNrows();
528 size_t ncols = matrix.GetNcols();
529 for (size_t row = 0; row < nrows; row++) {
530 for (size_t col = 0; col < ncols; col++) {
531 //TString tmp = TString::Format( "%5.15e ", matrix(row,col) );
532 s << std::scientific << matrix(row,col) << " ";
533 }
534 }
535
536 xmlengine.AddRawLine( matnode, s.str().c_str() );
537}
538
539//_________________________________________________________________________________________________
540template <typename Architecture_t>
541auto VGeneralLayer<Architecture_t>::ReadMatrixXML(void * node, const char * name, Matrix_t & matrix) -> void
542{
543 void *matrixXML = gTools().GetChild(node, name);
544 size_t rows, cols;
545 gTools().ReadAttr(matrixXML, "Rows", rows);
546 gTools().ReadAttr(matrixXML, "Columns", cols);
547
548 R__ASSERT((size_t) matrix.GetNrows() == rows);
549 R__ASSERT((size_t) matrix.GetNcols() == cols);
550
551 TMatrixT<Scalar_t> tmatrix(rows, cols);
552
553 const char * matrixString = gTools().xmlengine().GetNodeContent(matrixXML);
554 std::stringstream matrixStringStream(matrixString);
555
556 for (size_t i = 0; i < rows; i++)
557 {
558 for (size_t j = 0; j < cols; j++)
559 {
560#ifndef R__HAS_TMVAGPU
561 matrixStringStream >> tmatrix(i,j);
562#else
563 Scalar_t value;
564 matrixStringStream >> value;
565 tmatrix(i,j) = value;
566#endif
567
568 }
569 }
570
571 // copy from tmatrix to matrix
572 Matrix_t tmp( tmatrix);
573 Architecture_t::Copy(matrix, tmp);
574
575}
576
577
578template <typename Architecture>
579auto debugTensor(const typename Architecture::Tensor_t & A, const std::string name = "tensor") -> void
580{
581 Architecture::PrintTensor(A,name);
582}
583
584} // namespace DNN
585} // namespace TMVA
586
587#endif
int Int_t
Definition: RtypesCore.h:41
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:96
char name[80]
Definition: TGX11.cxx:109
Generic General Layer class.
Definition: GeneralLayer.h:49
std::vector< Matrix_t > fWeightGradients
Gradients w.r.t. the weights of the layer.
Definition: GeneralLayer.h:71
Tensor_t fOutput
Activations of this layer.
Definition: GeneralLayer.h:74
const std::vector< Matrix_t > & GetWeightGradients() const
Definition: GeneralLayer.h:181
virtual void SetDropoutProbability(Scalar_t)
Set Dropout probability.
Definition: GeneralLayer.h:157
void CopyParameters(const VGeneralLayer< Arch > &layer)
Copy all trainable weight and biases from another equivalent layer but with different architecture Th...
Definition: GeneralLayer.h:475
const Matrix_t & GetWeightsAt(size_t i) const
Definition: GeneralLayer.h:172
void SetHeight(size_t height)
Definition: GeneralLayer.h:219
void UpdateWeightGradients(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weight gradients, given some other weight gradients and learning rate.
Definition: GeneralLayer.h:435
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:392
Matrix_t & GetBiasesAt(size_t i)
Definition: GeneralLayer.h:179
void SetInputHeight(size_t inputHeight)
Definition: GeneralLayer.h:216
std::vector< Matrix_t > fBiasGradients
Gradients w.r.t. the bias values of the layer.
Definition: GeneralLayer.h:72
void SetDepth(size_t depth)
Definition: GeneralLayer.h:218
virtual void SetExtraLayerParameters(const std::vector< Matrix_t > &)
Definition: GeneralLayer.h:209
virtual void ReadWeightsFromXML(void *parent)=0
Read the information and the weights about the layer from XML node.
void UpdateBiasGradients(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the bias gradients, given some other weight gradients and learning rate.
Definition: GeneralLayer.h:445
void SetBatchSize(size_t batchSize)
Setters.
Definition: GeneralLayer.h:214
void CopyWeights(const std::vector< Matrix_t > &otherWeights)
Copies the weights provided as an input.
Definition: GeneralLayer.h:455
size_t fBatchSize
Batch size used for training and evaluation.
Definition: GeneralLayer.h:56
virtual void AddWeightsXMLTo(void *parent)=0
Writes the information and the weights about the layer in an XML node.
void UpdateWeights(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weights, given the gradients and the learning rate,.
Definition: GeneralLayer.h:415
typename Architecture_t::Matrix_t Matrix_t
Definition: GeneralLayer.h:52
const std::vector< Matrix_t > & GetBiasGradients() const
Definition: GeneralLayer.h:187
void SetInputDepth(size_t inputDepth)
Definition: GeneralLayer.h:215
const std::vector< Matrix_t > & GetWeights() const
Definition: GeneralLayer.h:169
size_t GetDepth() const
Definition: GeneralLayer.h:164
std::vector< Matrix_t > & GetWeights()
Definition: GeneralLayer.h:170
size_t fWidth
The width of this layer.
Definition: GeneralLayer.h:64
EInitialization fInit
The initialization method.
Definition: GeneralLayer.h:77
std::vector< Matrix_t > fBiases
The biases associated to the layer.
Definition: GeneralLayer.h:69
void SetIsTraining(bool isTraining)
Definition: GeneralLayer.h:221
size_t fInputWidth
The width of the previous layer or input.
Definition: GeneralLayer.h:60
size_t fHeight
The height of the layer.
Definition: GeneralLayer.h:63
virtual void Print() const =0
Prints the info about the layer.
size_t fInputDepth
The depth of the previous layer or input.
Definition: GeneralLayer.h:58
void SetWidth(size_t width)
Definition: GeneralLayer.h:220
bool fIsTraining
Flag indicating the mode.
Definition: GeneralLayer.h:66
const Tensor_t & GetOutput() const
Definition: GeneralLayer.h:193
const std::vector< Matrix_t > & GetBiases() const
Definition: GeneralLayer.h:175
typename Architecture_t::Scalar_t Scalar_t
Definition: GeneralLayer.h:53
std::vector< Matrix_t > & GetBiasGradients()
Definition: GeneralLayer.h:188
Tensor_t & GetActivationGradients()
Definition: GeneralLayer.h:197
std::vector< Matrix_t > fWeights
The weights associated to the layer.
Definition: GeneralLayer.h:68
EInitialization GetInitialization() const
Definition: GeneralLayer.h:211
Tensor_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Definition: GeneralLayer.h:75
Matrix_t & GetWeightsAt(size_t i)
Definition: GeneralLayer.h:173
Matrix_t & GetBiasGradientsAt(size_t i)
Definition: GeneralLayer.h:191
size_t GetInputDepth() const
Definition: GeneralLayer.h:161
const Matrix_t & GetActivationGradientsAt(size_t i) const
Definition: GeneralLayer.h:203
std::vector< Matrix_t > & GetBiases()
Definition: GeneralLayer.h:176
virtual std::vector< Matrix_t > GetExtraLayerParameters() const
Definition: GeneralLayer.h:207
void WriteMatrixToXML(void *node, const char *name, const Matrix_t &matrix)
Definition: GeneralLayer.h:518
Matrix_t GetActivationGradientsAt(size_t i)
Definition: GeneralLayer.h:202
std::vector< Matrix_t > & GetWeightGradients()
Definition: GeneralLayer.h:182
const Tensor_t & GetActivationGradients() const
Definition: GeneralLayer.h:196
size_t fInputHeight
The height of the previous layer or input.
Definition: GeneralLayer.h:59
size_t fDepth
The depth of the layer.
Definition: GeneralLayer.h:62
virtual void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward)=0
Backpropagates the error.
void CopyBiases(const std::vector< Matrix_t > &otherBiases)
Copies the biases provided as an input.
Definition: GeneralLayer.h:465
void Update(const Scalar_t learningRate)
Updates the weights and biases, given the learning rate.
Definition: GeneralLayer.h:407
const Matrix_t & GetBiasesAt(size_t i) const
Definition: GeneralLayer.h:178
virtual void ResetTraining()
Reset some training flags after a loop on all batches Some layer (e.g.
Definition: GeneralLayer.h:118
size_t GetInputHeight() const
Definition: GeneralLayer.h:162
void SetInputWidth(size_t inputWidth)
Definition: GeneralLayer.h:217
const Matrix_t & GetBiasGradientsAt(size_t i) const
Definition: GeneralLayer.h:190
void WriteTensorToXML(void *node, const char *name, const std::vector< Matrix_t > &tensor)
helper functions for XML
Definition: GeneralLayer.h:493
size_t GetBatchSize() const
Getters.
Definition: GeneralLayer.h:160
Matrix_t & GetWeightGradientsAt(size_t i)
Definition: GeneralLayer.h:185
void ReadMatrixXML(void *node, const char *name, Matrix_t &matrix)
Definition: GeneralLayer.h:541
virtual void Forward(Tensor_t &input, bool applyDropout=false)=0
Computes activation of the layer for the given input.
Matrix_t GetOutputAt(size_t i)
Definition: GeneralLayer.h:199
size_t GetWidth() const
Definition: GeneralLayer.h:166
size_t GetHeight() const
Definition: GeneralLayer.h:165
const Matrix_t & GetWeightGradientsAt(size_t i) const
Definition: GeneralLayer.h:184
void UpdateBiases(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the biases, given the gradients and the learning rate.
Definition: GeneralLayer.h:425
typename Architecture_t::Tensor_t Tensor_t
Definition: GeneralLayer.h:51
virtual ~VGeneralLayer()
Virtual Destructor.
Definition: GeneralLayer.h:385
const Matrix_t & GetOutputAt(size_t i) const
Definition: GeneralLayer.h:200
VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init)
Constructor.
Definition: GeneralLayer.h:236
size_t GetInputWidth() const
Definition: GeneralLayer.h:163
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TXMLEngine & xmlengine()
Definition: Tools.h:270
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:337
TMatrixT.
Definition: TMatrixT.h:39
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:709
const char * GetNodeContent(XMLNodePointer_t xmlnode)
get contents (if any) of xmlnode
EvaluateInfo init(std::vector< RooRealProxy > parameters, std::vector< ArrayWrapper * > wrappers, std::vector< double * > arrays, size_t begin, size_t batchSize)
static double A[]
void Copy(void *source, void *dest)
void Init(TClassEdit::TInterpreterLookupHelper *helper)
Definition: TClassEdit.cxx:155
static constexpr double s
EInitialization
Definition: Functions.h:70
auto debugTensor(const typename Architecture::Tensor_t &A, const std::string name="tensor") -> void
Definition: GeneralLayer.h:579
UInt_t Depth(const Node< T > *node)
Definition: NodekNN.h:213
create variable transformations
Tools & gTools()