Logo ROOT   6.16/01
Reference Guide
GeneralLayer.h
Go to the documentation of this file.
1// @(#)root/tmva/tmva/dnn:$Id$
2// Author: Vladimir Ilievski
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : TGeneralLayer *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * General Deep Neural Network Layer *
12 * *
13 * Authors (alphabetical): *
14 * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15 * *
16 * Copyright (c) 2005-2015: *
17 * CERN, Switzerland *
18 * U. of Victoria, Canada *
19 * MPI-K Heidelberg, Germany *
20 * U. of Bonn, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27#ifndef TMVA_DNN_GENERALLAYER
28#define TMVA_DNN_GENERALLAYER
29
30#include <iostream>
31#include <limits>
32
33// for xml
34#include "TMVA/Tools.h"
35
36namespace TMVA {
37namespace DNN {
38
39/** \class VGeneralLayer
40 Generic General Layer class.
41
42 This class represents the general class for all layers in the Deep Learning
43 Module.
44 */
45template <typename Architecture_t>
47 using Matrix_t = typename Architecture_t::Matrix_t;
48 using Scalar_t = typename Architecture_t::Scalar_t;
49
50protected:
51 size_t fBatchSize; ///< Batch size used for training and evaluation
52
53 size_t fInputDepth; ///< The depth of the previous layer or input.
54 size_t fInputHeight; ///< The height of the previous layer or input.
55 size_t fInputWidth; ///< The width of the previous layer or input.
56
57 size_t fDepth; ///< The depth of the layer.
58 size_t fHeight; ///< The height of the layer.
59 size_t fWidth; ///< The width of this layer.
60
61 bool fIsTraining; ///< Flag indicatig the mode
62
63 std::vector<Matrix_t> fWeights; ///< The weights associated to the layer.
64 std::vector<Matrix_t> fBiases; ///< The biases associated to the layer.
65
66 std::vector<Matrix_t> fWeightGradients; ///< Gradients w.r.t. the weights of the layer.
67 std::vector<Matrix_t> fBiasGradients; ///< Gradients w.r.t. the bias values of the layer.
68
69 std::vector<Matrix_t> fOutput; ///< Activations of this layer.
70 std::vector<Matrix_t> fActivationGradients; ///< Gradients w.r.t. the activations of this layer.
71
72 EInitialization fInit; ///< The initialization method.
73
74public:
75 /*! Constructor */
76 VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,
77 size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols,
78 size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows,
79 size_t OutputNCols, EInitialization Init);
80
81 /*! General Constructor with different weights dimension */
82 VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth,
83 size_t Height, size_t Width, size_t WeightsNSlices, std::vector<size_t> WeightsNRows,
84 std::vector<size_t> WeightsNCols, size_t BiasesNSlices, std::vector<size_t> BiasesNRows,
85 std::vector<size_t> BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols,
87
88 /*! Copy the layer provided as a pointer */
90
91 /*! Copy Constructor */
93
94 /*! Virtual Destructor. */
95 virtual ~VGeneralLayer();
96
97 /*! Initialize the weights and biases according to the given initialization method. */
98 void Initialize();
99
100 /*! Computes activation of the layer for the given input. The input
101 * must be in 3D tensor form with the different matrices corresponding to
102 * different events in the batch. */
103 virtual void Forward(std::vector<Matrix_t> &input, bool applyDropout = false) = 0;
104
105 /*! Backpropagates the error. Must only be called directly at the corresponding
106 * call to Forward(...). */
107 virtual void Backward(std::vector<Matrix_t> &gradients_backward, const std::vector<Matrix_t> &activations_backward,
108 std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2) = 0;
109
110 /*! Updates the weights and biases, given the learning rate */
111 void Update(const Scalar_t learningRate);
112
113 /*! Updates the weights, given the gradients and the learning rate, */
114 void UpdateWeights(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);
115
116 /*! Updates the biases, given the gradients and the learning rate. */
117 void UpdateBiases(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);
118
119 /*! Updates the weight gradients, given some other weight gradients and learning rate. */
120 void UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients, const Scalar_t learningRate);
121
122 /*! Updates the bias gradients, given some other weight gradients and learning rate. */
123 void UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients, const Scalar_t learningRate);
124
125 /*! Copies the weights provided as an input. */
126 void CopyWeights(const std::vector<Matrix_t> &otherWeights);
127
128 /*! Copies the biases provided as an input. */
129 void CopyBiases(const std::vector<Matrix_t> &otherBiases);
130
131 /*! Prints the info about the layer. */
132 virtual void Print() const = 0;
133
134 /*! Writes the information and the weights about the layer in an XML node. */
135 virtual void AddWeightsXMLTo(void *parent) = 0;
136
137 /*! Read the information and the weights about the layer from XML node. */
138 virtual void ReadWeightsFromXML(void *parent) = 0;
139
140 /*! Getters */
141 size_t GetBatchSize() const { return fBatchSize; }
142 size_t GetInputDepth() const { return fInputDepth; }
143 size_t GetInputHeight() const { return fInputHeight; }
144 size_t GetInputWidth() const { return fInputWidth; }
145 size_t GetDepth() const { return fDepth; }
146 size_t GetHeight() const { return fHeight; }
147 size_t GetWidth() const { return fWidth; }
148 bool IsTraining() const { return fIsTraining; }
149
150 const std::vector<Matrix_t> &GetWeights() const { return fWeights; }
151 std::vector<Matrix_t> &GetWeights() { return fWeights; }
152
153 const Matrix_t &GetWeightsAt(size_t i) const { return fWeights[i]; }
154 Matrix_t &GetWeightsAt(size_t i) { return fWeights[i]; }
155
156 const std::vector<Matrix_t> &GetBiases() const { return fBiases; }
157 std::vector<Matrix_t> &GetBiases() { return fBiases; }
158
159 const Matrix_t &GetBiasesAt(size_t i) const { return fBiases[i]; }
160 Matrix_t &GetBiasesAt(size_t i) { return fBiases[i]; }
161
162 const std::vector<Matrix_t> &GetWeightGradients() const { return fWeightGradients; }
163 std::vector<Matrix_t> &GetWeightGradients() { return fWeightGradients; }
164
165 const Matrix_t &GetWeightGradientsAt(size_t i) const { return fWeightGradients[i]; }
167
168 const std::vector<Matrix_t> &GetBiasGradients() const { return fBiasGradients; }
169 std::vector<Matrix_t> &GetBiasGradients() { return fBiasGradients; }
170
171 const Matrix_t &GetBiasGradientsAt(size_t i) const { return fBiasGradients[i]; }
173
174 const std::vector<Matrix_t> &GetOutput() const { return fOutput; }
175 std::vector<Matrix_t> &GetOutput() { return fOutput; }
176
177 const std::vector<Matrix_t> &GetActivationGradients() const { return fActivationGradients; }
178 std::vector<Matrix_t> &GetActivationGradients() { return fActivationGradients; }
179
180 Matrix_t &GetOutputAt(size_t i) { return fOutput[i]; }
181 const Matrix_t &GetOutputAt(size_t i) const { return fOutput[i]; }
182
184 const Matrix_t &GetActivationGradientsAt(size_t i) const { return fActivationGradients[i]; }
185
187
188 /*! Setters */
189 void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
190 void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
191 void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
192 void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
193 void SetDepth(size_t depth) { fDepth = depth; }
194 void SetHeight(size_t height) { fHeight = height; }
195 void SetWidth(size_t width) { fWidth = width; }
196 void SetIsTraining(bool isTraining) { fIsTraining = isTraining; }
197
198 /// helper functions for XML
199 void WriteTensorToXML( void * node, const char * name, const std::vector<Matrix_t> & tensor);
200 void WriteMatrixToXML( void * node, const char * name, const Matrix_t & matrix);
201
202 void ReadMatrixXML( void * node, const char * name, Matrix_t & matrix);
203
204};
205
206//
207//
208// The General Layer Class - Implementation
209//_________________________________________________________________________________________________
210template <typename Architecture_t>
211VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
212 size_t depth, size_t height, size_t width, size_t weightsNSlices,
213 size_t weightsNRows, size_t weightsNCols, size_t biasesNSlices,
214 size_t biasesNRows, size_t biasesNCols, size_t outputNSlices,
215 size_t outputNRows, size_t outputNCols, EInitialization init)
216 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
217 fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
218 fOutput(), fActivationGradients(), fInit(init)
219{
220
221 for (size_t i = 0; i < weightsNSlices; i++) {
222 fWeights.emplace_back(weightsNRows, weightsNCols);
223 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
224 }
225
226 for (size_t i = 0; i < biasesNSlices; i++) {
227 fBiases.emplace_back(biasesNRows, biasesNCols);
228 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
229 }
230
231 for (size_t i = 0; i < outputNSlices; i++) {
232 fOutput.emplace_back(outputNRows, outputNCols);
233 fActivationGradients.emplace_back(outputNRows, outputNCols);
234 }
235}
236
237//_________________________________________________________________________________________________
238template <typename Architecture_t>
239VGeneralLayer<Architecture_t>::VGeneralLayer(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
240 size_t depth, size_t height, size_t width, size_t weightsNSlices,
241 std::vector<size_t> weightsNRows, std::vector<size_t> weightsNCols,
242 size_t biasesNSlices, std::vector<size_t> biasesNRows,
243 std::vector<size_t> biasesNCols, size_t outputNSlices, size_t outputNRows,
244 size_t outputNCols, EInitialization init)
245 : fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth), fDepth(depth),
246 fHeight(height), fWidth(width), fIsTraining(true), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(),
247 fOutput(), fActivationGradients(), fInit(init)
248{
249
250 for (size_t i = 0; i < weightsNSlices; i++) {
251 fWeights.emplace_back(weightsNRows[i], weightsNCols[i]);
252 fWeightGradients.emplace_back(weightsNRows[i], weightsNCols[i]);
253 }
254
255 for (size_t i = 0; i < biasesNSlices; i++) {
256 fBiases.emplace_back(biasesNRows[i], biasesNCols[i]);
257 fBiasGradients.emplace_back(biasesNRows[i], biasesNCols[i]);
258 }
259
260 for (size_t i = 0; i < outputNSlices; i++) {
261 fOutput.emplace_back(outputNRows, outputNCols);
262 fActivationGradients.emplace_back(outputNRows, outputNCols);
263 }
264}
265
266//_________________________________________________________________________________________________
267template <typename Architecture_t>
269 : fBatchSize(layer->GetBatchSize()), fInputDepth(layer->GetInputDepth()), fInputHeight(layer->GetInputHeight()),
270 fInputWidth(layer->GetInputWidth()), fDepth(layer->GetDepth()), fHeight(layer->GetHeight()),
271 fWidth(layer->GetWidth()), fIsTraining(layer->IsTraining()), fWeights(), fBiases(), fWeightGradients(),
272 fBiasGradients(), fOutput(), fActivationGradients(), fInit(layer->GetInitialization())
273{
274 size_t weightsNSlices = (layer->GetWeights()).size();
275 size_t weightsNRows = 0;
276 size_t weightsNCols = 0;
277
278 for (size_t i = 0; i < weightsNSlices; i++) {
279 weightsNRows = (layer->GetWeightsAt(i)).GetNrows();
280 weightsNCols = (layer->GetWeightsAt(i)).GetNcols();
281
282 fWeights.emplace_back(weightsNRows, weightsNCols);
283 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
284
286 }
287
288 size_t biasesNSlices = (layer->GetBiases()).size();
289 size_t biasesNRows = 0;
290 size_t biasesNCols = 0;
291
292 for (size_t i = 0; i < biasesNSlices; i++) {
293 biasesNRows = (layer->GetBiasesAt(i)).GetNrows();
294 biasesNCols = (layer->GetBiasesAt(i)).GetNcols();
295
296 fBiases.emplace_back(biasesNRows, biasesNCols);
297 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
298
300 }
301
302 size_t outputNSlices = (layer->GetOutput()).size();
303 size_t outputNRows = 0;
304 size_t outputNCols = 0;
305
306 for (size_t i = 0; i < outputNSlices; i++) {
307 outputNRows = (layer->GetOutputAt(i)).GetNrows();
308 outputNCols = (layer->GetOutputAt(i)).GetNcols();
309
310 fOutput.emplace_back(outputNRows, outputNCols);
311 fActivationGradients.emplace_back(outputNRows, outputNCols);
312 }
313}
314
315//_________________________________________________________________________________________________
316template <typename Architecture_t>
318 : fBatchSize(layer.fBatchSize), fInputDepth(layer.fInputDepth), fInputHeight(layer.fInputHeight),
319 fInputWidth(layer.fInputWidth), fDepth(layer.fDepth), fHeight(layer.fHeight), fWidth(layer.fWidth),
320 fIsTraining(layer.fIsTraining), fWeights(), fBiases(), fWeightGradients(), fBiasGradients(), fOutput(),
321 fActivationGradients(), fInit(layer.fInit)
322{
323 size_t weightsNSlices = layer.fWeights.size();
324 size_t weightsNRows = 0;
325 size_t weightsNCols = 0;
326
327 for (size_t i = 0; i < weightsNSlices; i++) {
328 weightsNRows = (layer.fWeights[i]).GetNrows();
329 weightsNCols = (layer.fWeights[i]).GetNcols();
330
331 fWeights.emplace_back(weightsNRows, weightsNCols);
332 fWeightGradients.emplace_back(weightsNRows, weightsNCols);
333
335 }
336
337 size_t biasesNSlices = layer.fBiases.size();
338 size_t biasesNRows = 0;
339 size_t biasesNCols = 0;
340
341 for (size_t i = 0; i < biasesNSlices; i++) {
342 biasesNRows = (layer.fBiases[i]).GetNrows();
343 biasesNCols = (layer.fBiases[i]).GetNcols();
344
345 fBiases.emplace_back(biasesNRows, biasesNCols);
346 fBiasGradients.emplace_back(biasesNRows, biasesNCols);
347
349 }
350
351 size_t outputNSlices = layer.fOutput.size();
352 size_t outputNRows = 0;
353 size_t outputNCols = 0;
354
355 for (size_t i = 0; i < outputNSlices; i++) {
356 outputNRows = (layer.fOutput[i]).GetNrows();
357 outputNCols = (layer.fOutput[i]).GetNcols();
358
359 fOutput.emplace_back(outputNRows, outputNCols);
360 fActivationGradients.emplace_back(outputNRows, outputNCols);
361 }
362}
363
364//_________________________________________________________________________________________________
365template <typename Architecture_t>
367{
368 // Nothing to do here.
369}
370
371//_________________________________________________________________________________________________
372template <typename Architecture_t>
374{
375 for (size_t i = 0; i < fWeights.size(); i++) {
376 initialize<Architecture_t>(fWeights[i], this->GetInitialization());
377 initialize<Architecture_t>(fWeightGradients[i], EInitialization::kZero);
378 }
379
380 for (size_t i = 0; i < fBiases.size(); i++) {
381 initialize<Architecture_t>(fBiases[i], EInitialization::kZero);
382 initialize<Architecture_t>(fBiasGradients[i], EInitialization::kZero);
383 }
384}
385
386//_________________________________________________________________________________________________
387template <typename Architecture_t>
388auto VGeneralLayer<Architecture_t>::Update(const Scalar_t learningRate) -> void
389{
390 this->UpdateWeights(fWeightGradients, learningRate);
391 this->UpdateBiases(fBiasGradients, learningRate);
392}
393
394//_________________________________________________________________________________________________
395template <typename Architecture_t>
396auto VGeneralLayer<Architecture_t>::UpdateWeights(const std::vector<Matrix_t> &weightGradients,
397 const Scalar_t learningRate) -> void
398{
399 for (size_t i = 0; i < fWeights.size(); i++) {
400 Architecture_t::ScaleAdd(fWeights[i], weightGradients[i], -learningRate);
401 }
402}
403
404//_________________________________________________________________________________________________
405template <typename Architecture_t>
406auto VGeneralLayer<Architecture_t>::UpdateBiases(const std::vector<Matrix_t> &biasGradients,
407 const Scalar_t learningRate) -> void
408{
409 for (size_t i = 0; i < fBiases.size(); i++) {
410 Architecture_t::ScaleAdd(fBiases[i], biasGradients[i], -learningRate);
411 }
412}
413
414//_________________________________________________________________________________________________
415template <typename Architecture_t>
416auto VGeneralLayer<Architecture_t>::UpdateWeightGradients(const std::vector<Matrix_t> &weightGradients,
417 const Scalar_t learningRate) -> void
418{
419 for (size_t i = 0; i < fWeightGradients.size(); i++) {
420 Architecture_t::ScaleAdd(fWeightGradients[i], weightGradients[i], -learningRate);
421 }
422}
423
424//_________________________________________________________________________________________________
425template <typename Architecture_t>
426auto VGeneralLayer<Architecture_t>::UpdateBiasGradients(const std::vector<Matrix_t> &biasGradients,
427 const Scalar_t learningRate) -> void
428{
429 for (size_t i = 0; i < fBiasGradients.size(); i++) {
430 Architecture_t::ScaleAdd(fBiasGradients[i], biasGradients[i], -learningRate);
431 }
432}
433
434//_________________________________________________________________________________________________
435template <typename Architecture_t>
436auto VGeneralLayer<Architecture_t>::CopyWeights(const std::vector<Matrix_t> &otherWeights) -> void
437{
438
439 for (size_t i = 0; i < fWeights.size(); i++) {
440 Architecture_t::Copy(fWeights[i], otherWeights[i]);
441 }
442}
443
444//_________________________________________________________________________________________________
445template <typename Architecture_t>
446auto VGeneralLayer<Architecture_t>::CopyBiases(const std::vector<Matrix_t> &otherBiases) -> void
447{
448 for (size_t i = 0; i < fBiases.size(); i++) {
449 Architecture_t::Copy(fBiases[i], otherBiases[i]);
450 }
451}
452
453
454//_________________________________________________________________________________________________
455template <typename Architecture_t>
456auto VGeneralLayer<Architecture_t>::WriteTensorToXML(void * node, const char * name, const std::vector<Matrix_t> & tensor) -> void
457{
458 auto xmlengine = gTools().xmlengine();
459 void* matnode = xmlengine.NewChild(node, 0, name);
460 if (tensor.size() == 0) return;
461 xmlengine.NewAttr(matnode,0,"Depth", gTools().StringFromInt(tensor.size()) );
462 // assume same number of rows and columns for every matrix in std::vector
463 xmlengine.NewAttr(matnode,0,"Rows", gTools().StringFromInt(tensor[0].GetNrows()) );
464 xmlengine.NewAttr(matnode,0,"Columns", gTools().StringFromInt(tensor[0].GetNcols()) );
465 std::stringstream s;
466 for (size_t i = 0; i < tensor.size(); ++i) {
467 auto & mat = tensor[i];
468 for (Int_t row = 0; row < mat.GetNrows(); row++) {
469 for (Int_t col = 0; col < mat.GetNcols(); col++) {
470 TString tmp = TString::Format( "%5.15e ", (mat)(row,col) );
471 s << tmp.Data();
472 }
473 }
474 }
475 xmlengine.AddRawLine( matnode, s.str().c_str() );
476}
477
478//_________________________________________________________________________________________________
479template <typename Architecture_t>
480auto VGeneralLayer<Architecture_t>::WriteMatrixToXML(void * node, const char * name, const Matrix_t & matrix) -> void
481{
482 auto xmlengine = gTools().xmlengine();
483 void* matnode = xmlengine.NewChild(node, 0, name);
484
485 xmlengine.NewAttr(matnode,0,"Rows", gTools().StringFromInt(matrix.GetNrows()) );
486 xmlengine.NewAttr(matnode,0,"Columns", gTools().StringFromInt(matrix.GetNcols()) );
487 std::stringstream s;
488 s.precision( std::numeric_limits<Scalar_t>::digits10 );
489 size_t nrows = matrix.GetNrows();
490 size_t ncols = matrix.GetNcols();
491 for (size_t row = 0; row < nrows; row++) {
492 for (size_t col = 0; col < ncols; col++) {
493 //TString tmp = TString::Format( "%5.15e ", matrix(row,col) );
494 s << std::scientific << matrix(row,col) << " ";
495 }
496 }
497
498 xmlengine.AddRawLine( matnode, s.str().c_str() );
499}
500
501//_________________________________________________________________________________________________
502template <typename Architecture_t>
503auto VGeneralLayer<Architecture_t>::ReadMatrixXML(void * node, const char * name, Matrix_t & matrix) -> void
504{
505 void *matrixXML = gTools().GetChild(node, name);
506 size_t rows, cols;
507 gTools().ReadAttr(matrixXML, "Rows", rows);
508 gTools().ReadAttr(matrixXML, "Columns", cols);
509
510 R__ASSERT((size_t) matrix.GetNrows() == rows);
511 R__ASSERT((size_t) matrix.GetNcols() == cols);
512
513 const char * matrixString = gTools().xmlengine().GetNodeContent(matrixXML);
514 std::stringstream matrixStringStream(matrixString);
515
516 for (size_t i = 0; i < rows; i++)
517 {
518 for (size_t j = 0; j < cols; j++)
519 {
520#ifndef R__HAS_TMVAGPU
521 matrixStringStream >> matrix(i,j);
522#else
523 Scalar_t value;
524 matrixStringStream >> value;
525 matrix(i,j) = value;
526#endif
527
528 }
529 }
530}
531
532} // namespace DNN
533} // namespace TMVA
534
535#endif
static Int_t init()
int Int_t
Definition: RtypesCore.h:41
include TDocParser_001 C image html pict1_TDocParser_001 png width
Definition: TDocParser.cxx:121
#define R__ASSERT(e)
Definition: TError.h:96
Generic General Layer class.
Definition: GeneralLayer.h:46
std::vector< Matrix_t > fWeightGradients
Gradients w.r.t. the weights of the layer.
Definition: GeneralLayer.h:66
const std::vector< Matrix_t > & GetWeightGradients() const
Definition: GeneralLayer.h:162
const Matrix_t & GetWeightsAt(size_t i) const
Definition: GeneralLayer.h:153
void SetHeight(size_t height)
Definition: GeneralLayer.h:194
void UpdateWeightGradients(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weight gradients, given some other weight gradients and learning rate.
Definition: GeneralLayer.h:416
void Initialize()
Initialize the weights and biases according to the given initialization method.
Definition: GeneralLayer.h:373
Matrix_t & GetBiasesAt(size_t i)
Definition: GeneralLayer.h:160
void SetInputHeight(size_t inputHeight)
Definition: GeneralLayer.h:191
std::vector< Matrix_t > fBiasGradients
Gradients w.r.t. the bias values of the layer.
Definition: GeneralLayer.h:67
void SetDepth(size_t depth)
Definition: GeneralLayer.h:193
virtual void ReadWeightsFromXML(void *parent)=0
Read the information and the weights about the layer from XML node.
virtual void Backward(std::vector< Matrix_t > &gradients_backward, const std::vector< Matrix_t > &activations_backward, std::vector< Matrix_t > &inp1, std::vector< Matrix_t > &inp2)=0
Backpropagates the error.
void UpdateBiasGradients(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the bias gradients, given some other weight gradients and learning rate.
Definition: GeneralLayer.h:426
void SetBatchSize(size_t batchSize)
Setters.
Definition: GeneralLayer.h:189
void CopyWeights(const std::vector< Matrix_t > &otherWeights)
Copies the weights provided as an input.
Definition: GeneralLayer.h:436
size_t fBatchSize
Batch size used for training and evaluation.
Definition: GeneralLayer.h:51
virtual void AddWeightsXMLTo(void *parent)=0
Writes the information and the weights about the layer in an XML node.
std::vector< Matrix_t > fActivationGradients
Gradients w.r.t. the activations of this layer.
Definition: GeneralLayer.h:70
void UpdateWeights(const std::vector< Matrix_t > &weightGradients, const Scalar_t learningRate)
Updates the weights, given the gradients and the learning rate,.
Definition: GeneralLayer.h:396
typename Architecture_t::Matrix_t Matrix_t
Definition: GeneralLayer.h:47
const std::vector< Matrix_t > & GetBiasGradients() const
Definition: GeneralLayer.h:168
void SetInputDepth(size_t inputDepth)
Definition: GeneralLayer.h:190
const std::vector< Matrix_t > & GetWeights() const
Definition: GeneralLayer.h:150
size_t GetDepth() const
Definition: GeneralLayer.h:145
std::vector< Matrix_t > & GetWeights()
Definition: GeneralLayer.h:151
size_t fWidth
The width of this layer.
Definition: GeneralLayer.h:59
EInitialization fInit
The initialization method.
Definition: GeneralLayer.h:72
std::vector< Matrix_t > fBiases
The biases associated to the layer.
Definition: GeneralLayer.h:64
void SetIsTraining(bool isTraining)
Definition: GeneralLayer.h:196
size_t fInputWidth
The width of the previous layer or input.
Definition: GeneralLayer.h:55
size_t fHeight
The height of the layer.
Definition: GeneralLayer.h:58
virtual void Print() const =0
Prints the info about the layer.
std::vector< Matrix_t > fOutput
Activations of this layer.
Definition: GeneralLayer.h:69
size_t fInputDepth
The depth of the previous layer or input.
Definition: GeneralLayer.h:53
void SetWidth(size_t width)
Definition: GeneralLayer.h:195
bool fIsTraining
Flag indicatig the mode.
Definition: GeneralLayer.h:61
Matrix_t & GetOutputAt(size_t i)
Definition: GeneralLayer.h:180
const std::vector< Matrix_t > & GetBiases() const
Definition: GeneralLayer.h:156
typename Architecture_t::Scalar_t Scalar_t
Definition: GeneralLayer.h:48
std::vector< Matrix_t > & GetBiasGradients()
Definition: GeneralLayer.h:169
std::vector< Matrix_t > fWeights
The weights associated to the layer.
Definition: GeneralLayer.h:63
EInitialization GetInitialization() const
Definition: GeneralLayer.h:186
Matrix_t & GetWeightsAt(size_t i)
Definition: GeneralLayer.h:154
Matrix_t & GetBiasGradientsAt(size_t i)
Definition: GeneralLayer.h:172
std::vector< Matrix_t > & GetActivationGradients()
Definition: GeneralLayer.h:178
size_t GetInputDepth() const
Definition: GeneralLayer.h:142
const Matrix_t & GetActivationGradientsAt(size_t i) const
Definition: GeneralLayer.h:184
std::vector< Matrix_t > & GetBiases()
Definition: GeneralLayer.h:157
void WriteMatrixToXML(void *node, const char *name, const Matrix_t &matrix)
Definition: GeneralLayer.h:480
std::vector< Matrix_t > & GetWeightGradients()
Definition: GeneralLayer.h:163
const std::vector< Matrix_t > & GetActivationGradients() const
Definition: GeneralLayer.h:177
size_t fInputHeight
The height of the previous layer or input.
Definition: GeneralLayer.h:54
size_t fDepth
The depth of the layer.
Definition: GeneralLayer.h:57
const std::vector< Matrix_t > & GetOutput() const
Definition: GeneralLayer.h:174
void CopyBiases(const std::vector< Matrix_t > &otherBiases)
Copies the biases provided as an input.
Definition: GeneralLayer.h:446
std::vector< Matrix_t > & GetOutput()
Definition: GeneralLayer.h:175
void Update(const Scalar_t learningRate)
Updates the weights and biases, given the learning rate.
Definition: GeneralLayer.h:388
const Matrix_t & GetBiasesAt(size_t i) const
Definition: GeneralLayer.h:159
virtual void Forward(std::vector< Matrix_t > &input, bool applyDropout=false)=0
Computes activation of the layer for the given input.
size_t GetInputHeight() const
Definition: GeneralLayer.h:143
void SetInputWidth(size_t inputWidth)
Definition: GeneralLayer.h:192
const Matrix_t & GetBiasGradientsAt(size_t i) const
Definition: GeneralLayer.h:171
void WriteTensorToXML(void *node, const char *name, const std::vector< Matrix_t > &tensor)
helper functions for XML
Definition: GeneralLayer.h:456
size_t GetBatchSize() const
Getters.
Definition: GeneralLayer.h:141
Matrix_t & GetWeightGradientsAt(size_t i)
Definition: GeneralLayer.h:166
void ReadMatrixXML(void *node, const char *name, Matrix_t &matrix)
Definition: GeneralLayer.h:503
size_t GetWidth() const
Definition: GeneralLayer.h:147
size_t GetHeight() const
Definition: GeneralLayer.h:146
const Matrix_t & GetWeightGradientsAt(size_t i) const
Definition: GeneralLayer.h:165
void UpdateBiases(const std::vector< Matrix_t > &biasGradients, const Scalar_t learningRate)
Updates the biases, given the gradients and the learning rate.
Definition: GeneralLayer.h:406
virtual ~VGeneralLayer()
Virtual Destructor.
Definition: GeneralLayer.h:366
const Matrix_t & GetOutputAt(size_t i) const
Definition: GeneralLayer.h:181
Matrix_t & GetActivationGradientsAt(size_t i)
Definition: GeneralLayer.h:183
VGeneralLayer(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t Depth, size_t Height, size_t Width, size_t WeightsNSlices, size_t WeightsNRows, size_t WeightsNCols, size_t BiasesNSlices, size_t BiasesNRows, size_t BiasesNCols, size_t OutputNSlices, size_t OutputNRows, size_t OutputNCols, EInitialization Init)
Constructor.
Definition: GeneralLayer.h:211
size_t GetInputWidth() const
Definition: GeneralLayer.h:144
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TXMLEngine & xmlengine()
Definition: Tools.h:270
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:337
Basic string class.
Definition: TString.h:131
const char * Data() const
Definition: TString.h:364
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2286
XMLNodePointer_t NewChild(XMLNodePointer_t parent, XMLNsPointer_t ns, const char *name, const char *content=0)
create new child element for parent node
Definition: TXMLEngine.cxx:707
const char * GetNodeContent(XMLNodePointer_t xmlnode)
get contents (if any) of xmlnode
void Copy(void *source, void *dest)
void Init(TClassEdit::TInterpreterLookupHelper *helper)
Definition: TClassEdit.cxx:121
static constexpr double s
EInitialization
Definition: Functions.h:70
UInt_t Depth(const Node< T > *node)
Definition: NodekNN.h:213
Abstract ClassifierFactory template that handles arbitrary types.
Tools & gTools()