27#ifndef TMVA_DNN_DLMINIMIZERS
28#define TMVA_DNN_DLMINIMIZERS
64template <
typename Architecture_t>
68 using Scalar_t =
typename Architecture_t::Scalar_t;
69 using Matrix_t =
typename Architecture_t::Matrix_t;
163template <
typename Architecture_t>
165 : fBatchSize(0), fStepCount(0), fConvergenceSteps(0), fConvergenceCount(0), fTestInterval(0), fLearningRate(0),
166 fMinimumError(std::numeric_limits<
Scalar_t>::infinity())
172template <
typename Architecture_t>
175 : fBatchSize(0), fStepCount(0), fConvergenceSteps(convergenceSteps), fConvergenceCount(0),
176 fTestInterval(testInterval), fLearningRate(learningRate), fMinimumError(std::numeric_limits<
Scalar_t>::infinity())
182template <
typename Architecture_t>
189 deepNet.
Update(fLearningRate);
193template <
typename Architecture_t>
201 for (
size_t i = 0; i < deepNet.
GetDepth(); i++) {
204 layer->UpdateWeights(layer->GetWeightGradients(), fLearningRate);
206 layer->UpdateBiases(layer->GetBiasGradients(), fLearningRate);
212template <
typename Architecture_t>
217 deepNet.Backward(input,
output, weights);
218 deepNet.Update(fLearningRate);
224template <
typename Architecture_t>
230 fTrainingError = loss;
231 deepNet.Backward(input,
output, weights);
233 for (
size_t i = 0; i < deepNet.GetDepth(); i++) {
234 auto *layer = deepNet.GetLayerAt(i);
236 layer->UpdateWeights(layer->GetWeightGradients(), fLearningRate);
238 layer->UpdateBiases(layer->GetBiasGradients(), fLearningRate);
246template <
typename Architecture_t>
251 master.ParallelForward(nets, batches);
252 master.ParallelBackward(nets, batches, fLearningRate);
256template <
typename Architecture_t>
261 master.ParallelForward(nets, batches);
262 master.ParallelBackwardMomentum(nets, batches, fLearningRate, momentum);
266template <
typename Architecture_t>
271 master.ParallelForward(nets, batches);
272 master.ParallelBackwardNestorov(nets, batches, fLearningRate, momentum);
276template <
typename Architecture_t>
279 if (fTestError < fMinimumError * 0.999) {
280 fConvergenceCount = 0;
281 fMinimumError = fTestError;
286 return (fConvergenceCount >= fConvergenceSteps);
290template <
typename Architecture_t>
293 fTestError = testError;
294 if (fTestError < fMinimumError * 0.999) {
295 fConvergenceCount = 0;
296 fMinimumError = fTestError;
298 fConvergenceCount += fTestInterval;
300 return (fConvergenceCount >= fConvergenceSteps);
Scalar_t fMinimumError
The minimum loss achieved on the training set.
void SetBatchSize(Scalar_t rate)
typename Architecture_t::Scalar_t Scalar_t
Scalar_t fTestError
Holds the most recently computed test loss.
void StepNesterov(DeepNet_t &master, std::vector< DeepNet_t > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses Nesterov momentum.
size_t GetConvergenceSteps() const
bool HasConverged()
Increases the minimization step counter by the test error evaluation period and uses the current inte...
void SetTestInterval(size_t interval)
Scalar_t StepReducedWeightsLoss(DeepNet_t &deepNet, std::vector< Matrix_t > &input, const Matrix_t &output, const Matrix_t &weights)
Similar to StepReducedWeights(...) but also evaluates the loss.
size_t fStepCount
Number of steps performed in the current training session.
void Reset()
Reset minimizer object to default state.
size_t fBatchSize
Batch size to use for the training.
void StepReducedWeights(DeepNet_t &deepNet, std::vector< Matrix_t > &input, const Matrix_t &output, const Matrix_t &weights)
Does not evaluate the loss and therefore not trigger a possible synchronization with the device.
void SetConvergenceSteps(size_t steps)
Setters.
size_t fConvergenceCount
Current number of training epochs without.
void SetLearningRate(Scalar_t rate)
Scalar_t StepLoss(DeepNet_t &deepNet, std::vector< Matrix_t > &input, const Matrix_t &output, const Matrix_t &weights)
Same as Step(...) but also evaluate the loss on the given training data.
size_t fConvergenceSteps
Number of training epochs without considerable.
TDeepNet< Architecture_t > DeepNet_t
size_t GetTestInterval() const
size_t GetConvergenceCount() const
Getters.
size_t fTestInterval
Interval for the computation of the test error.
typename Architecture_t::Matrix_t Matrix_t
Scalar_t GetTrainingError() const
Scalar_t fLearningRate
Learning rate .
Scalar_t fTrainingError
Holds the most recently computed training loss.
void Step(DeepNet_t &deepNet, std::vector< Matrix_t > &input, const Matrix_t &output, const Matrix_t &weights)
Perform a single optimization step on a given batch.
Scalar_t GetTestError() const
void StepMomentum(DeepNet_t &master, std::vector< DeepNet_t > &nets, std::vector< TTensorBatch< Architecture_t > > &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses momentum.
Generic Deep Neural Network class.
void Forward(Tensor_t &input, bool applyDropout=false)
Function that executes the entire forward pass in the network.
void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights)
Function that executes the entire backward pass in the network.
Layer_t * GetLayerAt(size_t i)
Get the layer in the vector of layers at poistion i.
void Update(Scalar_t learningRate)
Function that will update the weights and biases in the layers that contain weights and biases.
create variable transformations
static void output(int code)