Logo ROOT  
Reference Guide
GeneralLayer.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TGeneralLayer *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * General Deep Neural Network Layer *
12 * *
13 * Authors (alphabetical): *
14 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15 * *
16 * Copyright (c) 2005-2015: *
17 * CERN, Switzerland *
18 * U. of Victoria, Canada *
19 * MPI-K Heidelberg, Germany *
20 * U. of Bonn, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27#ifndef TMVA_DNN_GENERALLAYER
28#define TMVA_DNN_GENERALLAYER
29
30#include <iostream>
31#include <limits>
32
33// for xml
34#include "TMVA/Tools.h"
35#include "TError.h" // for R__ASSERT
36
37#include "TMVA/DNN/Functions.h"
38
39namespace TMVA {
40namespace DNN {
41
42/** \class VGeneralLayer
43 Generic General Layer class.
44
45 This class represents the general class for all layers in the Deep Learning
46 Module.
47 */
48template <typename Architecture_t>
50
51 using Tensor_t = typename Architecture_t::Tensor_t;
52 using Matrix_t = typename Architecture_t::Matrix_t;
53 using Scalar_t = typename Architecture_t::Scalar_t;
54
55
56protected:
57 size_t fBatchSize; ///< Batch size used for training and evaluation
58
59 size_t fInputDepth; ///< The depth of the previous layer or input.
60 size_t fInputHeight; ///< The height of the previous layer or input.
61 size_t fInputWidth; ///< The width of the previous layer or input.
62
63 size_t fDepth; ///< The depth of the layer.
64 size_t fHeight; ///< The height of the layer.
65 size_t fWidth; ///< The width of this layer.
66
67 bool fIsTraining; ///< Flag indicating the mode
68
69 std::vector<Matrix_t> fWeights; ///< The weights associated to the layer.
70 std::vector<Matrix_t> fBiases; ///< The biases associated to the layer.
71
72 std::vector<Matrix_t> fWeightGradients; ///< Gradients w.r.t. the weights of the layer.
73 std::vector<Matrix_t> fBiasGradients; ///< Gradients w.r.t. the bias values of the layer.
74
75 Tensor_t fOutput; ///< Activations of this layer.
76 Tensor_t fActivationGradients; ///< Gradients w.r.t. the activations of this layer.
77
78 EInitialization fInit; ///< The initialization method.
79
80public:
81 /*! Constructor */
82 VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,
83 size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols,
84 size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows,
85 size_t OutputNCols, EInitialization Init);
86
87 /*! General Constructor with different weights dimension */
88 VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,
89 size_t Height, size_t Width, size_t WeightsNSlices, std::vector<size_t> WeightsNRows,
90 std::vector<size_t> WeightsNCols, size_t BiasesNSlices, std::vector<size_t> BiasesNRows,
91 std::vector<size_t> BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols,
93
94 /*! Copy the layer provided as a pointer */
96
97 /*! Copy Constructor */
99
100 /*! Virtual Destructor. */
101 virtual ~VGeneralLayer();
102
103 /*! Initialize the weights and biases according to the given initialization method. */
104 virtual void Initialize();
105
106 /*! Computes activation of the layer for the given input. The input
107 * must be in 3D tensor form with the different matrices corresponding to
108 * different events in the batch. */
109 virtual void Forward(Tensor_t &input, bool applyDropout = false) = 0;
110
111 /*! Backpropagates the error. Must only be called directly at the corresponding
112 * call to Forward(...). */
113 virtual void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward ) = 0;
114 ///// std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2) = 0;
115
116 /*! Reset some training flags after a loop on all batches
117 Some layer (e.g. batchnormalization) might need to implement the function in case some operations
118 are needed after looping an all batches */
119 virtual void ResetTraining() {}
120
121 /*! Updates the weights and biases, given the learning rate */
122 void Update(const Scalar_t learningRate);
123
124 /*! Updates the weights, given the gradients and the learning rate, */
125 void UpdateWeights(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);
126
127 /*! Updates the biases, given the gradients and the learning rate. */
128 void UpdateBiases(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);
129
130 /*! Updates the weight gradients, given some other weight gradients and learning rate. */
131 void UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);
132
133 /*! Updates the bias gradients, given some other weight gradients and learning rate. */
134 void UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);
135
136 /*! Copies the weights provided as an input. */
137 void CopyWeights(const std::vector<Matrix_t> &otherWeights);
138
139 /*! Copies the biases provided as an input. */
140 void CopyBiases(const std::vector<Matrix_t> &otherBiases);
141
142 /*! Copy all trainable weight and biases from another equivalent layer but with different architecture
143 The function can copy also extra parameters in addition to weights and biases if they are return
144 by the function GetExtraLayerParameters */
145 template <typename Arch>
146 void CopyParameters(const VGeneralLayer<Arch> &layer);
147
148 /*! Prints the info about the layer. */
149 virtual void Print() const = 0;
150
151 /*! Writes the information and the weights about the layer in an XML node. */
152 virtual void AddWeightsXMLTo(void *parent) = 0;
153
154 /*! Read the information and the weights about the layer from XML node. */
155 virtual void ReadWeightsFromXML(void *parent) = 0;
156
157 /*! Set Dropout probability. Reimplemented for layesrs supporting droput */
159
160 /*! Getters */
161 size_t GetBatchSize() const { return fBatchSize; }
162 size_t GetInputDepth() const { return fInputDepth; }
163 size_t GetInputHeight() const { return fInputHeight; }
164 size_t GetInputWidth() const { return fInputWidth; }
165 size_t GetDepth() const { return fDepth; }
166 size_t GetHeight() const { return fHeight; }
167 size_t GetWidth() const { return fWidth; }
168 bool IsTraining() const { return fIsTraining; }
169
170 const std::vector<Matrix_t> &GetWeights() const { return fWeights; }
171 std::vector<Matrix_t> &GetWeights() { return fWeights; }
172
173 const Matrix_t &GetWeightsAt(size_t i) const { return fWeights[i]; }
174 Matrix_t &GetWeightsAt(size_t i) { return fWeights[i]; }
175
176 const std::vector<Matrix_t> &GetBiases() const { return fBiases; }
177 std::vector<Matrix_t> &GetBiases() { return fBiases; }
178
179 const Matrix_t &GetBiasesAt(size_t i) const { return fBiases[i]; }
180 Matrix_t &GetBiasesAt(size_t i) { return fBiases[i]; }
181
182 const std::vector<Matrix_t> &GetWeightGradients() const { return fWeightGradients; }
183 std::vector<Matrix_t> &GetWeightGradients() { return fWeightGradients; }
184
185 const Matrix_t &GetWeightGradientsAt(size_t i) const { return fWeightGradients[i]; }
187
188 const std::vector<Matrix_t> &GetBiasGradients() const { return fBiasGradients; }
189 std::vector<Matrix_t> &GetBiasGradients() { return fBiasGradients; }
190
191 const Matrix_t &GetBiasGradientsAt(size_t i) const { return fBiasGradients[i]; }
193
194 const Tensor_t &GetOutput() const { return fOutput; }
196
199
200 Matrix_t GetOutputAt(size_t i) { return fOutput.At(i).GetMatrix(); }
201 const Matrix_t &GetOutputAt(size_t i) const { return fOutput.At(i).GetMatrix(); }
202
203 Matrix_t GetActivationGradientsAt(size_t i) { return fActivationGradients.At(i).GetMatrix(); }
204 const Matrix_t &GetActivationGradientsAt(size_t i) const { return fActivationGradients.At(i).GetMatrix(); }
205
206 // function to retrieve additional layer parameters which are learned during training but they are not weights
207 // an example are the mean and std of batch normalization layer
208 virtual std::vector<Matrix_t> GetExtraLayerParameters() const { return std::vector<Matrix_t>(); }
209 // same thing but to set these extra parameters
210 virtual void SetExtraLayerParameters(const std::vector<Matrix_t> & ) {}
211
213
214 /*! Setters */
215 void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
216 void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
217 void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
218 void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
219 void SetDepth(size_t depth) { fDepth = depth; }
220 void SetHeight(size_t height) { fHeight = height; }
221 void SetWidth(size_t width) { fWidth = width; }
222 void SetIsTraining(bool isTraining) { fIsTraining = isTraining; }
223
224 /// helper functions for XML
225 void WriteTensorToXML( void * node, const char * name, const std::vector<Matrix_t> & tensor);
226 void WriteMatrixToXML( void * node, const char * name, const Matrix_t & matrix);
227
228 void ReadMatrixXML( void * node, const char * name, Matrix_t & matrix);
229
230};
231
232//
233//
234// The General Layer Class - Implementation
235//_________________________________________________________________________________________________
236template <typename Architecture_t>
237VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
238 size_t depth, size_t height, size_t width, size_t weightsNSlices,
239 size_t weightsNRows, size_t weightsNCols, size_t biasesNSlices,
240 size_t biasesNRows, size_t biasesNCols, size_t outputNSlices,
241 size_t outputNRows, size_t outputNCols, EInitialization init)
242 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
243 fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
244 fOutput( outputNSlices, outputNRows, outputNCols ),
245 fActivationGradients( outputNSlices, outputNRows, outputNCols ),
246 fInit(init)
247{
248
249 for (size_t i = 0; i < weightsNSlices; i++) {
250 fWeights.emplace_back(weightsNRows, weightsNCols);
251 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
252 }
253
254 for (size_t i = 0; i < biasesNSlices; i++) {
255 fBiases.emplace_back(biasesNRows, biasesNCols);
256 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
257 }
258}
259
260//_________________________________________________________________________________________________
261template <typename Architecture_t>
262VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
263 size_t depth, size_t height, size_t width, size_t weightsNSlices,
264 std::vector<size_t> weightsNRows, std::vector<size_t> weightsNCols,
265 size_t biasesNSlices, std::vector<size_t> biasesNRows,
266 std::vector<size_t> biasesNCols, size_t outputNSlices, size_t outputNRows,
267 size_t outputNCols, EInitialization init)
268 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
269 fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
270 fOutput( outputNSlices, outputNRows, outputNCols ),
271 fActivationGradients( outputNSlices, outputNRows, outputNCols ),
272 fInit(init)
273{
274 // add constructor for weights with different shapes (e.g. in recurrent layers)
275 for (size_t i = 0; i < weightsNSlices; i++) {
276 fWeights.emplace_back(weightsNRows[i], weightsNCols[i]);
277 fWeightGradients.emplace_back(weightsNRows[i], weightsNCols[i]);
278 }
279
280 for (size_t i = 0; i < biasesNSlices; i++) {
281 fBiases.emplace_back(biasesNRows[i], biasesNCols[i]);
282 fBiasGradients.emplace_back(biasesNRows[i], biasesNCols[i]);
283 }
284
285 // for (size_t i = 0; i < outputNSlices; i++) {
286 // fOutput.emplace_back(outputNRows, outputNCols);
287 // fActivationGradients.emplace_back(outputNRows, outputNCols);
288 // }
289}
290
291//_________________________________________________________________________________________________
292template <typename Architecture_t>
294 : fBatchSize(layer->GetBatchSize()), fInputDepth(layer->GetInputDepth()), fInputHeight(layer->GetInputHeight()),
295 fInputWidth(layer->GetInputWidth()), fDepth(layer->GetDepth()), fHeight(layer->GetHeight()),
296 fWidth(layer->GetWidth()), fIsTraining(layer->IsTraining()), fWeights(), fBiases(), fWeightGradients(),
297 fBiasGradients(),
298 fOutput( layer->GetOutput().GetShape() ), // construct from shape of other tensor
299 fActivationGradients( layer->GetActivationGradients().GetShape() ),
300 fInit(layer->GetInitialization() )
301{
302 // Constructor from another layer pointer of a different architecture
303 size_t weightsNSlices = (layer->GetWeights()).size();
304 size_t weightsNRows = 0;
305 size_t weightsNCols = 0;
306
307 for (size_t i = 0; i < weightsNSlices; i++) {
308 weightsNRows = (layer->GetWeightsAt(i)).GetNrows();
309 weightsNCols = (layer->GetWeightsAt(i)).GetNcols();
310
311 fWeights.emplace_back(weightsNRows, weightsNCols);
312 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
313
315 }
316
317 size_t biasesNSlices = (layer->GetBiases()).size();
318 size_t biasesNRows = 0;
319 size_t biasesNCols = 0;
320
321 for (size_t i = 0; i < biasesNSlices; i++) {
322 biasesNRows = (layer->GetBiasesAt(i)).GetNrows();
323 biasesNCols = (layer->GetBiasesAt(i)).GetNcols();
324
325 fBiases.emplace_back(biasesNRows, biasesNCols);
326 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
327
329 }
330}
331
332//_________________________________________________________________________________________________
333template <typename Architecture_t>
335 : fBatchSize(layer.fBatchSize), fInputDepth(layer.fInputDepth), fInputHeight(layer.fInputHeight),
336 fInputWidth(layer.fInputWidth), fDepth(layer.fDepth), fHeight(layer.fHeight), fWidth(layer.fWidth),
337 fIsTraining(layer.fIsTraining), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
338 fOutput( layer.GetOutput() ),
339 fActivationGradients( layer.GetActivationGradients() ),
340 fInit( layer.GetInitialization())
341{
342 // copy constructor
343 size_t weightsNSlices = layer.fWeights.size();
344 size_t weightsNRows = 0;
345 size_t weightsNCols = 0;
346
347 for (size_t i = 0; i < weightsNSlices; i++) {
348 weightsNRows = (layer.fWeights[i]).GetNrows();
349 weightsNCols = (layer.fWeights[i]).GetNcols();
350
351 fWeights.emplace_back(weightsNRows, weightsNCols);
352 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
353
355 }
356
357 size_t biasesNSlices = layer.fBiases.size();
358 size_t biasesNRows = 0;
359 size_t biasesNCols = 0;
360
361 for (size_t i = 0; i < biasesNSlices; i++) {
362 biasesNRows = (layer.fBiases[i]).GetNrows();
363 biasesNCols = (layer.fBiases[i]).GetNcols();
364
365 fBiases.emplace_back(biasesNRows, biasesNCols);
366 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
367
369 }
370
371 size_t outputNSlices = layer.fOutput.size();
372 size_t outputNRows = 0;
373 size_t outputNCols = 0;
374
375 for (size_t i = 0; i < outputNSlices; i++) {
376 outputNRows = (layer.fOutput[i]).GetNrows();
377 outputNCols = (layer.fOutput[i]).GetNcols();
378
379 fOutput.emplace_back(outputNRows, outputNCols);
380 fActivationGradients.emplace_back(outputNRows, outputNCols);
381 }
382}
383
384//_________________________________________________________________________________________________
385template <typename Architecture_t>
387{
388 // Nothing to do here.
389}
390
391//_________________________________________________________________________________________________
392template <typename Architecture_t>
394{
395 for (size_t i = 0; i < fWeights.size(); i++) {
396 initialize<Architecture_t>(fWeights[i], this->GetInitialization());
397 initialize<Architecture_t>(fWeightGradients[i], EInitialization::kZero);
398 }
399
400 for (size_t i = 0; i < fBiases.size(); i++) {
401 initialize<Architecture_t>(fBiases[i], EInitialization::kZero);
402 initialize<Architecture_t>(fBiasGradients[i], EInitialization::kZero);
403 }
404}
405
406//_________________________________________________________________________________________________
407template <typename Architecture_t>
408auto VGeneralLayer<Architecture_t>::Update(const Scalar_t learningRate) -> void
409{
410 this->UpdateWeights(fWeightGradients, learningRate);
411 this->UpdateBiases(fBiasGradients, learningRate);
412}
413
414//_________________________________________________________________________________________________
415template <typename Architecture_t>
416auto VGeneralLayer<Architecture_t>::UpdateWeights(const std::vector<Matrix_t> &weightGradients,
417 const Scalar_t learningRate) -> void
418{
419 for (size_t i = 0; i < fWeights.size(); i++) {
420 Architecture_t::ScaleAdd(fWeights[i], weightGradients[i], -learningRate);
421 }
422}
423
424//_________________________________________________________________________________________________
425template <typename Architecture_t>
426auto VGeneralLayer<Architecture_t>::UpdateBiases(const std::vector<Matrix_t> &biasGradients,
427 const Scalar_t learningRate) -> void
428{
429 for (size_t i = 0; i < fBiases.size(); i++) {
430 Architecture_t::ScaleAdd(fBiases[i], biasGradients[i], -learningRate);
431 }
432}
433
434//_________________________________________________________________________________________________
435template <typename Architecture_t>
436auto VGeneralLayer<Architecture_t>::UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients,
437 const Scalar_t learningRate) -> void
438{
439 for (size_t i = 0; i < fWeightGradients.size(); i++) {
440 Architecture_t::ScaleAdd(fWeightGradients[i], weightGradients[i], -learningRate);
441 }
442}
443
444//_________________________________________________________________________________________________
445template <typename Architecture_t>
446auto VGeneralLayer<Architecture_t>::UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients,
447 const Scalar_t learningRate) -> void
448{
449 for (size_t i = 0; i < fBiasGradients.size(); i++) {
450 Architecture_t::ScaleAdd(fBiasGradients[i], biasGradients[i], -learningRate);
451 }
452}
453
454//_________________________________________________________________________________________________
455template <typename Architecture_t>
456auto VGeneralLayer<Architecture_t>::CopyWeights(const std::vector<Matrix_t> &otherWeights) -> void
457{
458
459 for (size_t i = 0; i < fWeights.size(); i++) {
460 Architecture_t::Copy(fWeights[i], otherWeights[i]);
461 }
462}
463
464//_________________________________________________________________________________________________
465template <typename Architecture_t>
466auto VGeneralLayer<Architecture_t>::CopyBiases(const std::vector<Matrix_t> &otherBiases) -> void
467{
468 for (size_t i = 0; i < fBiases.size(); i++) {
469 Architecture_t::Copy(fBiases[i], otherBiases[i]);
470 }
471}
472
473//_________________________________________________________________________________________________
474template <typename Architecture_t>
475template <typename Arch>
477{
478 //assert(!std::is_same<Arch, Architecture_t>::value);
479 // copy weights from a different arhcitecture- default generic implementation
480 Architecture_t::CopyDiffArch(this->GetWeights(), layer.GetWeights());
481 Architecture_t::CopyDiffArch(this->GetBiases(), layer.GetBiases());
482
483 // copy also the additional layer parameters
484 auto params = layer.GetExtraLayerParameters();
485 if (params.size() > 0) {
486 auto paramsToCopy = GetExtraLayerParameters();
487 Architecture_t::CopyDiffArch(paramsToCopy, params );
488 SetExtraLayerParameters(paramsToCopy);
489 }
490}
491
492//_________________________________________________________________________________________________
493template <typename Architecture_t>
494auto VGeneralLayer<Architecture_t>::WriteTensorToXML(void * node, const char * name, const std::vector<Matrix_t> & tensor) -> void
495{
496 auto xmlengine = gTools().xmlengine();
497 void* matnode = xmlengine.NewChild(node, 0, name);
498 if (tensor.size() == 0) return;
499 xmlengine.NewAttr(matnode,0,"Depth", gTools().StringFromInt(tensor.size()) );
500 // assume same number of rows and columns for every matrix in std::vector
501 xmlengine.NewAttr(matnode,0,"Rows", gTools().StringFromInt(tensor[0].GetNrows()) );
502 xmlengine.NewAttr(matnode,0,"Columns", gTools().StringFromInt(tensor[0].GetNcols()) );
503 std::stringstream s;
504 for (size_t i = 0; i < tensor.size(); ++i) {
505 auto & mat = tensor[i];
506 for (Int_t row = 0; row < mat.GetNrows(); row++) {
507 for (Int_t col = 0; col < mat.GetNcols(); col++) {
508 // TString tmp = TString::Format( "%5.15e ", (mat)(row,col) );
509 // s << tmp.Data();
510 s << std::scientific << mat(row, col) << " ";
511 }
512 }
513 }
514 xmlengine.AddRawLine( matnode, s.str().c_str() );
515}
516
517//_________________________________________________________________________________________________
518template <typename Architecture_t>
519auto VGeneralLayer<Architecture_t>::WriteMatrixToXML(void * node, const char * name, const Matrix_t & matrix) -> void
520{
521 auto xmlengine = gTools().xmlengine();
522 void* matnode = xmlengine.NewChild(node, 0, name);
523
524 xmlengine.NewAttr(matnode,0,"Rows", gTools().StringFromInt(matrix.GetNrows()) );
525 xmlengine.NewAttr(matnode,0,"Columns", gTools().StringFromInt(matrix.GetNcols()) );
526 std::stringstream s;
527 s.precision( std::numeric_limits<Scalar_t>::digits10 );
528 size_t nrows = matrix.GetNrows();
529 size_t ncols = matrix.GetNcols();
530 for (size_t row = 0; row < nrows; row++) {
531 for (size_t col = 0; col < ncols; col++) {
532 //TString tmp = TString::Format( "%5.15e ", matrix(row,col) );
533 s << std::scientific << matrix(row,col) << " ";
534 }
535 }
536
537 xmlengine.AddRawLine( matnode, s.str().c_str() );
538}
539
540//_________________________________________________________________________________________________
541template <typename Architecture_t>
542auto VGeneralLayer<Architecture_t>::ReadMatrixXML(void * node, const char * name, Matrix_t & matrix) -> void
543{
544 void *matrixXML = gTools().GetChild(node, name);
545 size_t rows, cols;
546 gTools().ReadAttr(matrixXML, "Rows", rows);
547 gTools().ReadAttr(matrixXML, "Columns", cols);
548
549 R__ASSERT((size_t) matrix.GetNrows() == rows);
550 R__ASSERT((size_t) matrix.GetNcols() == cols);
551
552 TMatrixT<Scalar_t> tmatrix(rows, cols);
553
554 const char * matrixString = gTools().xmlengine().GetNodeContent(matrixXML);
555 std::stringstream matrixStringStream(matrixString);
556
557 for (size_t i = 0; i < rows; i++)
558 {
559 for (size_t j = 0; j < cols; j++)
560 {
561#ifndef R__HAS_TMVAGPU
562 matrixStringStream >> tmatrix(i,j);
563#else
564 Scalar_t value;
565 matrixStringStream >> value;
566 tmatrix(i,j) = value;
567#endif
568
569 }
570 }
571
572 // copy from tmatrix to matrix
573 Matrix_t tmp( tmatrix);
574 Architecture_t::Copy(matrix, tmp);
575
576}
577
578
579template <typename Architecture>
580auto debugTensor(const typename Architecture::Tensor_t & A, const std::string name = "tensor") -> void
581{
582 Architecture::PrintTensor(A,name);
583}
584
585} // namespace DNN
586} // namespace TMVA
587
588#endif
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:96
char name[80]
Definition: TGX11.cxx:109
Generic General Layer class.
Definition: GeneralLayer.h:49
std::vector< Matrix_t > fWeightGradients
Gradients w.r.t. the weights of the layer.
Definition: GeneralLayer.h:72
Tensor_t fOutput
Activations of this layer.
Definition: GeneralLayer.h:75
const std::vector< Matrix_t > & GetWeightGradients() const
Definition: GeneralLayer.h:182
virtual void SetDropoutProbability(Scalar_t)
Set Dropout probability.
Definition: GeneralLayer.h:158
void CopyParameters(const VGeneralLayer< Arch > &layer)
Copy all trainable weight and biases from another equivalent layer but with different architecture Th...
Definition: GeneralLayer.h:476
const Matrix_t & GetWeightsAt(size_t i) const
Definition: GeneralLayer.h:173
void SetHeight(size_t height)
Definition: GeneralLayer.h:220
void UpdateWeightGradients(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weight gradients, given some other weight gradients and learning rate.
Definition: GeneralLayer.h:436
virtual void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:393
Matrix_t & GetBiasesAt(size_t i)
Definition: GeneralLayer.h:180
void SetInputHeight(size_t inputHeight)
Definition: GeneralLayer.h:217
std::vector< Matrix_t > fBiasGradients
Gradients w.r.t. the bias values of the layer.
Definition: GeneralLayer.h:73
void SetDepth(size_t depth)
Definition: GeneralLayer.h:219
virtual void SetExtraLayerParameters(const std::vector< Matrix_t > &)
Definition: GeneralLayer.h:210
virtual void ReadWeightsFromXML(void *parent)=0
Read the information and the weights about the layer from XML node.
void UpdateBiasGradients(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the bias gradients, given some other weight gradients and learning rate.
Definition: GeneralLayer.h:446
void SetBatchSize(size_t batchSize)
Setters.
Definition: GeneralLayer.h:215
void CopyWeights(const std::vector< Matrix_t > &otherWeights)
Copies the weights provided as an input.
Definition: GeneralLayer.h:456
size_t fBatchSize
Batch size used for training and evaluation.
Definition: GeneralLayer.h:57
virtual void AddWeightsXMLTo(void *parent)=0
Writes the information and the weights about the layer in an XML node.
void UpdateWeights(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weights, given the gradients and the learning rate,.
Definition: GeneralLayer.h:416
typename Architecture_t::Matrix_t Matrix_t
Definition: GeneralLayer.h:52
const std::vector< Matrix_t > & GetBiasGradients() const
Definition: GeneralLayer.h:188
void SetInputDepth(size_t inputDepth)
Definition: GeneralLayer.h:216
const std::vector< Matrix_t > & GetWeights() const
Definition: GeneralLayer.h:170
size_t GetDepth() const
Definition: GeneralLayer.h:165
std::vector< Matrix_t > & GetWeights()
Definition: GeneralLayer.h:171
size_t fWidth
The width of this layer.
Definition: GeneralLayer.h:65
EInitialization fInit
The initialization method.
Definition: GeneralLayer.h:78
std::vector< Matrix_t > fBiases
The biases associated to the layer.
Definition: GeneralLayer.h:70
void SetIsTraining(bool isTraining)
Definition: GeneralLayer.h:222
size_t fInputWidth
The width of the previous layer or input.
Definition: GeneralLayer.h:61
size_t fHeight
The height of the layer.
Definition: GeneralLayer.h:64
virtual void Print() const =0
Prints the info about the layer.
size_t fInputDepth
The depth of the previous layer or input.
Definition: GeneralLayer.h:59
void SetWidth(size_t width)
Definition: GeneralLayer.h:221
bool fIsTraining
Flag indicating the mode.
Definition: GeneralLayer.h:67
const Tensor_t & GetOutput() const
Definition: GeneralLayer.h:194
const std::vector< Matrix_t > & GetBiases() const
Definition: GeneralLayer.h:176
typename Architecture_t::Scalar_t Scalar_t
Definition: GeneralLayer.h:53
std::vector< Matrix_t > & GetBiasGradients()
Definition: GeneralLayer.h:189
Tensor_t & GetActivationGradients()
Definition: GeneralLayer.h:198
std::vector< Matrix_t > fWeights
The weights associated to the layer.
Definition: GeneralLayer.h:69
EInitialization GetInitialization() const
Definition: GeneralLayer.h:212
Tensor_t fActivationGradients
Gradients w.r.t. the activations of this layer.
Definition: GeneralLayer.h:76
Matrix_t & GetWeightsAt(size_t i)
Definition: GeneralLayer.h:174
Matrix_t & GetBiasGradientsAt(size_t i)
Definition: GeneralLayer.h:192
size_t GetInputDepth() const
Definition: GeneralLayer.h:162
const Matrix_t & GetActivationGradientsAt(size_t i) const
Definition: GeneralLayer.h:204
std::vector< Matrix_t > & GetBiases()
Definition: GeneralLayer.h:177
virtual std::vector< Matrix_t > GetExtraLayerParameters() const
Definition: GeneralLayer.h:208
void WriteMatrixToXML(void *node, const char *name, const Matrix_t &matrix)
Definition: GeneralLayer.h:519
Matrix_t GetActivationGradientsAt(size_t i)
Definition: GeneralLayer.h:203
std::vector< Matrix_t > & GetWeightGradients()
Definition: GeneralLayer.h:183
const Tensor_t & GetActivationGradients() const
Definition: GeneralLayer.h:197
size_t fInputHeight
The height of the previous layer or input.
Definition: GeneralLayer.h:60
size_t fDepth
The depth of the layer.
Definition: GeneralLayer.h:63
virtual void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward)=0
Backpropagates the error.
void CopyBiases(const std::vector< Matrix_t > &otherBiases)
Copies the biases provided as an input.
Definition: GeneralLayer.h:466
void Update(const Scalar_t learningRate)
Updates the weights and biases, given the learning rate.
Definition: GeneralLayer.h:408
const Matrix_t & GetBiasesAt(size_t i) const
Definition: GeneralLayer.h:179
virtual void ResetTraining()
Reset some training flags after a loop on all batches Some layer (e.g.
Definition: GeneralLayer.h:119
size_t GetInputHeight() const
Definition: GeneralLayer.h:163
void SetInputWidth(size_t inputWidth)
Definition: GeneralLayer.h:218
const Matrix_t & GetBiasGradientsAt(size_t i) const
Definition: GeneralLayer.h:191
void WriteTensorToXML(void *node, const char *name, const std::vector< Matrix_t > &tensor)
helper functions for XML
Definition: GeneralLayer.h:494
size_t GetBatchSize() const
Getters.
Definition: GeneralLayer.h:161
Matrix_t & GetWeightGradientsAt(size_t i)
Definition: GeneralLayer.h:186
void ReadMatrixXML(void *node, const char *name, Matrix_t &matrix)
Definition: GeneralLayer.h:542
virtual void Forward(Tensor_t &input, bool applyDropout=false)=0
Computes activation of the layer for the given input.
Matrix_t GetOutputAt(size_t i)
Definition: GeneralLayer.h:200
size_t GetWidth() const
Definition: GeneralLayer.h:167
size_t GetHeight() const
Definition: GeneralLayer.h:166
const Matrix_t & GetWeightGradientsAt(size_t i) const
Definition: GeneralLayer.h:185
void UpdateBiases(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the biases, given the gradients and the learning rate.
Definition: GeneralLayer.h:426
typename Architecture_t::Tensor_t Tensor_t
Definition: GeneralLayer.h:51
virtual ~VGeneralLayer()
Virtual Destructor.
Definition: GeneralLayer.h:386
const Matrix_t & GetOutputAt(size_t i) const
Definition: GeneralLayer.h:201
VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init)
Constructor.
Definition: GeneralLayer.h:237
size_t GetInputWidth() const
Definition: GeneralLayer.h:164
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1161
TXMLEngine & xmlengine()
Definition: Tools.h:268
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
TMatrixT.
Definition: TMatrixT.h:39
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=nullptr)
create new child element for parent node
Definition: TXMLEngine.cxx:709
const char * GetNodeContent(XMLNodePointer_t xmlnode)
get contents (if any) of xmlnode
EvaluateInfo init(std::vector< RooRealProxy > parameters, std::vector< ArrayWrapper * > wrappers, std::vector< double * > arrays, size_t begin, size_t batchSize)
static double A[]
void Copy(void *source, void *dest)
void Init(TClassEdit::TInterpreterLookupHelper *helper)
Definition: TClassEdit.cxx:154
static constexpr double s
EInitialization
Definition: Functions.h:72
auto debugTensor(const typename Architecture::Tensor_t &A, const std::string name="tensor") -> void
Definition: GeneralLayer.h:580
UInt_t Depth(const Node< T > *node)
Definition: NodekNN.h:213
create variable transformations
Tools & gTools()